1
0
forked from wrenn/wrenn

feat: add env expansion, sandbox env fetching, and configurable

healthchecks

Fix ENV instructions to expand $VAR references at set time using the
current env state, preventing self-referencing values like
PATH=/opt/venv/bin:$PATH from producing recursive expansions. Remove
expandEnv from shellPrefix to avoid double expansion.

Fetch sandbox environment variables via `env` before recipe execution
so ENV steps resolve against actual runtime values from the base
template image.

Replace hardcoded healthcheck timing with a Dockerfile-like flag parser
supporting --interval, --timeout, --start-period, and --retries. Add
start-period grace window and bounded retry counting to
waitForHealthcheck.

Add python-interpreter-v0-beta recipe and healthcheck files.
This commit is contained in:
Tasnim Kabir Sadik
2026-04-07 01:15:43 +06:00
parent ab38c8372c
commit 4f340b8847
10 changed files with 537 additions and 17 deletions

View File

@ -137,7 +137,7 @@ image-minimal:
sudo bash images/templates/minimal/build.sh sudo bash images/templates/minimal/build.sh
image-python: image-python:
sudo bash images/templates/python311/build.sh sudo bash images/templates/python312/build.sh
image-node: image-node:
sudo bash images/templates/node20/build.sh sudo bash images/templates/node20/build.sh

View File

@ -20,6 +20,34 @@ echo "+cpu +memory +io" > /sys/fs/cgroup/cgroup.subtree_control 2>/dev/null || t
# Set hostname # Set hostname
hostname sandbox hostname sandbox
# Configure networking from kernel cmdline (ip=client::gw:mask:host:iface:autoconf).
# if command -v ip >/dev/null 2>&1; then
# iparg=$(cat /proc/cmdline | tr ' ' '\n' | sed -n 's/^ip=//p')
# if [ -n "$iparg" ]; then
# client=$(echo "$iparg" | cut -d: -f1)
# gw=$(echo "$iparg" | cut -d: -f2)
# mask=$(echo "$iparg" | cut -d: -f3)
# iface=$(echo "$iparg" | cut -d: -f5)
# [ -z "$iface" ] && iface=eth0
# if [ -n "$client" ]; then
# ip addr add "$client/${mask:-30}" dev "$iface" 2>/dev/null || true
# ip link set "$iface" up 2>/dev/null || true
# if [ -n "$gw" ]; then
# ip route add default via "$gw" 2>/dev/null || true
# fi
# fi
# fi
# fi
#
#
if ! ip addr show eth0 2>/dev/null | grep -q "169.254.0.21"; then
ip link set lo up
ip link set eth0 up
ip addr add 169.254.0.21/30 dev eth0
ip route add default via 169.254.0.22
fi
# Configure DNS resolver. # Configure DNS resolver.
echo "nameserver 8.8.8.8" > /etc/resolv.conf echo "nameserver 8.8.8.8" > /etc/resolv.conf
echo "nameserver 8.8.4.4" >> /etc/resolv.conf echo "nameserver 8.8.4.4" >> /etc/resolv.conf

View File

@ -1,6 +1,8 @@
package recipe package recipe
import "strings" import (
"strings"
)
// ExecContext holds mutable state that persists across recipe steps. // ExecContext holds mutable state that persists across recipe steps.
// It is initialized empty and updated by ENV and WORKDIR steps. // It is initialized empty and updated by ENV and WORKDIR steps.
@ -56,6 +58,74 @@ func (c *ExecContext) shellPrefix() string {
return sb.String() return sb.String()
} }
// expandEnv replaces $var and ${var} placeholders in the string s with their
// corresponding values from the vars map.
// It supports escaping with $$, which is replaced by a single $.
// If a variable is not found in the vars map, it is replaced with an empty
// string.
func expandEnv(s string, vars map[string]string) string {
var sb strings.Builder
sb.Grow(len(s) * 2)
for {
idx := strings.IndexByte(s, '$')
if idx < 0 {
sb.WriteString(s)
break
}
sb.WriteString(s[:idx])
s = s[idx:]
if len(s) == 1 {
sb.WriteByte('$')
break
}
if s[1] == '$' {
sb.WriteByte('$')
s = s[2:]
continue
}
var name string
var advance int
if s[1] == '{' {
end := strings.IndexByte(s[2:], '}')
if end < 0 {
sb.WriteByte('$')
s = s[1:]
continue
}
name = s[2 : 2+end]
advance = 2 + end + 1
} else {
j := 1
for j < len(s) && isNameChar(s[j]) {
j++
}
name = s[1:j]
advance = j
}
if v, ok := vars[name]; ok {
sb.WriteString(v)
}
s = s[advance:]
}
return sb.String()
}
// isNameChar reports whether the byte c is a valid character for an
// environment variable name (alphanumeric or underscore)
func isNameChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '_'
}
// shellescape wraps s in single quotes, escaping any embedded single quotes. // shellescape wraps s in single quotes, escaping any embedded single quotes.
// This is POSIX-safe for paths, env values, and shell commands. // This is POSIX-safe for paths, env values, and shell commands.
func shellescape(s string) string { func shellescape(s string) string {

View File

@ -45,6 +45,14 @@ func TestExecContext_WrappedCommand(t *testing.T) {
cmd: "echo $MSG", cmd: "echo $MSG",
want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'", want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'",
}, },
{
name: "env expansion with dollar sign PATH",
ctx: ExecContext{
EnvVars: map[string]string{"PATH": "/usr/bin", "FOO": "/opt/venv/bin:$PATH"},
},
cmd: "make build",
want: "FOO='/opt/venv/bin:/usr/bin' PATH='/usr/bin' /bin/sh -c 'make build'",
},
} }
for _, tc := range tests { for _, tc := range tests {
@ -94,6 +102,109 @@ func TestExecContext_StartCommand(t *testing.T) {
} }
} }
func TestExpandEnv(t *testing.T) {
tests := []struct {
s string
vars map[string]string
want string
}{
{
s: "hello",
vars: nil,
want: "hello",
},
{
s: "$PATH",
vars: map[string]string{"PATH": "/usr/bin"},
want: "/usr/bin",
},
{
s: "${PATH}",
vars: map[string]string{"PATH": "/usr/bin"},
want: "/usr/bin",
},
{
s: "/opt/venv/bin:$PATH",
vars: map[string]string{"PATH": "/usr/bin"},
want: "/opt/venv/bin:/usr/bin",
},
{
s: "${HOME}/code",
vars: map[string]string{"HOME": "/root"},
want: "/root/code",
},
{
s: "hello $USER",
vars: map[string]string{"USER": "admin"},
want: "hello admin",
},
{
s: "$UNSET",
vars: map[string]string{"PATH": "/usr/bin"},
want: "",
},
{
s: "${UNSET}",
vars: map[string]string{"PATH": "/usr/bin"},
want: "",
},
{
s: "$$",
vars: map[string]string{"PATH": "/usr/bin"},
want: "$",
},
{
s: "price is $$100",
vars: nil,
want: "price is $100",
},
{
s: "$FOO:$BAR",
vars: map[string]string{"FOO": "a", "BAR": "b"},
want: "a:b",
},
{
s: "${FOO}_${BAR}",
vars: map[string]string{"FOO": "hello", "BAR": "world"},
want: "hello_world",
},
{
s: "no vars here",
vars: nil,
want: "no vars here",
},
{
s: "$",
vars: nil,
want: "$",
},
{
s: "${",
vars: nil,
want: "${",
},
{
s: "${}",
vars: nil,
want: "",
},
{
s: "$VAR1$VAR2",
vars: map[string]string{"VAR1": "a", "VAR2": "b"},
want: "ab",
},
}
for _, tc := range tests {
t.Run(tc.s, func(t *testing.T) {
got := expandEnv(tc.s, tc.vars)
if got != tc.want {
t.Errorf("expandEnv(%q, %v)\n got %q\n want %q", tc.s, tc.vars, got, tc.want)
}
})
}
}
func TestShellescape(t *testing.T) { func TestShellescape(t *testing.T) {
tests := []struct { tests := []struct {
input string input string

View File

@ -68,7 +68,7 @@ func Execute(
if bctx.EnvVars == nil { if bctx.EnvVars == nil {
bctx.EnvVars = make(map[string]string) bctx.EnvVars = make(map[string]string)
} }
bctx.EnvVars[st.Key] = st.Value bctx.EnvVars[st.Key] = expandEnv(st.Value, bctx.EnvVars)
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true}) entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
case KindWORKDIR: case KindWORKDIR:

View File

@ -0,0 +1,94 @@
package recipe
import (
"fmt"
"strconv"
"strings"
"time"
)
// HealthcheckConfig holds the parsed configuration for a build healthcheck.
// A healthcheck is a shell command that is executed repeatedly inside the
// sandbox until it succeeds or the retry/timeout budget is exhausted.
//
// Retries of 0 means unlimited retries (bounded only by the overall deadline)
type HealthcheckConfig struct {
Cmd string
Interval time.Duration
Timeout time.Duration
StartPeriod time.Duration
Retries int // 0 = unlimited
}
// ParseHealthcheck parses a healthcheck string with optional flag prefix into
// a HealthcheckConfig. The syntax is:
//
// [--interval=<duration>] [--timeout=<duration>] [--start-period=<duration>]
// [--retries=<n>] <command>
//
// Flags must use the form --flag=value. The first token that does not start
// with "--" and everything after it is treated as the command. Defaults:
// interval=3s, timeout=10s, start-period=0, retries=0 (unlimited)
func ParseHealthcheck(s string) (HealthcheckConfig, error) {
s = strings.TrimSpace(s)
if s == "" {
return HealthcheckConfig{}, fmt.Errorf("empty healthcheck")
}
hc := HealthcheckConfig{
Interval: 3 * time.Second,
Timeout: 10 * time.Second,
}
tokens := strings.Fields(s)
cmdIndex := -1
for i, token := range tokens {
if !strings.HasPrefix(token, "--") {
cmdIndex = i
break
}
parts := strings.SplitN(token, "=", 2)
if len(parts) != 2 {
return HealthcheckConfig{}, fmt.Errorf("malformed flag (missing '='): %q", token)
}
key, val := parts[0], parts[1]
switch key {
case "--interval":
d, err := time.ParseDuration(val)
if err != nil {
return HealthcheckConfig{}, fmt.Errorf("parse interval: %w", err)
}
hc.Interval = d
case "--timeout":
d, err := time.ParseDuration(val)
if err != nil {
return HealthcheckConfig{}, fmt.Errorf("parse timeout: %w", err)
}
hc.Timeout = d
case "--start-period":
d, err := time.ParseDuration(val)
if err != nil {
return HealthcheckConfig{}, fmt.Errorf("parse start period: %w", err)
}
hc.StartPeriod = d
case "--retries":
r, err := strconv.Atoi(val)
if err != nil {
return HealthcheckConfig{}, fmt.Errorf("parse retries: %w", err)
}
hc.Retries = r
default:
return HealthcheckConfig{}, fmt.Errorf("unknown healthcheck flag: %q", token)
}
}
if cmdIndex == -1 {
return HealthcheckConfig{}, fmt.Errorf("healthcheck has no command")
}
hc.Cmd = strings.Join(tokens[cmdIndex:], " ")
return hc, nil
}

View File

@ -0,0 +1,126 @@
package recipe
import (
"testing"
"time"
)
func TestParseHealthcheck(t *testing.T) {
tests := []struct {
name string
input string
want HealthcheckConfig
wantErr bool
}{
{
name: "plain command",
input: "curl -f http://localhost:8080",
want: HealthcheckConfig{
Cmd: "curl -f http://localhost:8080",
Interval: 3 * time.Second,
Timeout: 10 * time.Second,
},
wantErr: false,
},
{
name: "all flags",
input: "--interval=5s --timeout=2s --start-period=15s --retries=3 ping -c 1 8.8.8.8",
want: HealthcheckConfig{
Cmd: "ping -c 1 8.8.8.8",
Interval: 5 * time.Second,
Timeout: 2 * time.Second,
StartPeriod: 15 * time.Second,
Retries: 3,
},
wantErr: false,
},
{
name: "partial flags",
input: "--timeout=5s my-custom-check --verbose",
want: HealthcheckConfig{
Cmd: "my-custom-check --verbose",
Interval: 3 * time.Second,
Timeout: 5 * time.Second,
},
wantErr: false,
},
{
name: "retries only",
input: "--retries=5 test.sh",
want: HealthcheckConfig{
Cmd: "test.sh",
Interval: 3 * time.Second,
Timeout: 10 * time.Second,
Retries: 5,
},
wantErr: false,
},
{
name: "empty string",
input: "",
wantErr: true,
},
{
name: "whitespace only",
input: " \t \n ",
wantErr: true,
},
{
name: "flags but no command",
input: "--interval=5s --retries=2",
wantErr: true,
},
{
name: "unknown flag",
input: "--magic=true my-check",
wantErr: true,
},
{
name: "invalid duration",
input: "--interval=5smiles check.sh",
wantErr: true,
},
{
name: "invalid retries",
input: "--retries=five check.sh",
wantErr: true,
},
{
name: "command with dashes",
input: "--interval=2s command-with-dash --flag=value",
want: HealthcheckConfig{
Cmd: "command-with-dash --flag=value",
Interval: 2 * time.Second,
Timeout: 10 * time.Second,
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseHealthcheck(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("ParseHealthcheck() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !tt.wantErr {
if got.Cmd != tt.want.Cmd {
t.Errorf("Cmd got = %v, want %v", got.Cmd, tt.want.Cmd)
}
if got.Interval != tt.want.Interval {
t.Errorf("Interval got = %v, want %v", got.Interval, tt.want.Interval)
}
if got.Timeout != tt.want.Timeout {
t.Errorf("Timeout got = %v, want %v", got.Timeout, tt.want.Timeout)
}
if got.StartPeriod != tt.want.StartPeriod {
t.Errorf("StartPeriod got = %v, want %v", got.StartPeriod, tt.want.StartPeriod)
}
if got.Retries != tt.want.Retries {
t.Errorf("Retries got = %v, want %v", got.Retries, tt.want.Retries)
}
}
})
}
}

View File

@ -5,6 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog" "log/slog"
"strings"
"sync" "sync"
"time" "time"
@ -23,8 +24,6 @@ import (
const ( const (
buildQueueKey = "wrenn:build_queue" buildQueueKey = "wrenn:build_queue"
buildCommandTimeout = 30 * time.Second buildCommandTimeout = 30 * time.Second
healthcheckInterval = 1 * time.Second
healthcheckTimeout = 60 * time.Second
) )
// preBuildCmds run before the user recipe to prepare the build environment. // preBuildCmds run before the user recipe to prepare the build environment.
@ -321,11 +320,18 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
panic(fmt.Sprintf("invalid post-build recipe: %v", err)) panic(fmt.Sprintf("invalid post-build recipe: %v", err))
} }
// Execute build phases: pre-build → user recipe → post-build.
// bctx carries working directory and env vars across all phases.
var logs []recipe.BuildLogEntry var logs []recipe.BuildLogEntry
step := 0 step := 0
bctx := &recipe.ExecContext{}
envVars, err := s.fetchSandboxEnv(buildCtx, agent, sandboxIDStr)
if err != nil {
log.Warn("failed to fetch sandbox env, using defaults", "error", err)
envVars = map[string]string{
"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOME": "/root",
}
}
bctx := &recipe.ExecContext{EnvVars: envVars}
runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool { runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool {
newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec) newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec)
@ -365,8 +371,14 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
// Healthcheck or direct snapshot. // Healthcheck or direct snapshot.
var sizeBytes int64 var sizeBytes int64
if build.Healthcheck != "" { if build.Healthcheck != "" {
log.Info("running healthcheck", "cmd", build.Healthcheck) hc, err := recipe.ParseHealthcheck(build.Healthcheck)
if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil { if err != nil {
s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid healthcheck: %v", err))
return
}
log.Info("running healthcheck", "cmd", hc.Cmd, "interval", hc.Interval, "timeout", hc.Timeout, "start_period", hc.StartPeriod, "retries", hc.Retries)
if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc); err != nil {
s.destroySandbox(buildCtx, agent, sandboxIDStr) s.destroySandbox(buildCtx, agent, sandboxIDStr)
if buildCtx.Err() != nil { if buildCtx.Err() != nil {
return return
@ -445,36 +457,61 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
log.Info("template build completed successfully", "name", build.Name) log.Info("template build completed successfully", "name", build.Name)
} }
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr, cmd string) error { // waitForHealthcheck repeatedly executes the healthcheck command inside the
deadline := time.NewTimer(healthcheckTimeout) // sandbox according to the config's interval, timeout, start-period, and
// retries.
// During the start period, failures are not counted toward the retry budget.
// Returns nil on the first successful check, or an error if retries are
// exhausted, the deadline passes, or the context is cancelled.
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig) error {
maxAttempts := 100
if hc.Retries > 0 {
maxAttempts = hc.Retries
}
deadline := time.NewTimer(hc.StartPeriod + time.Duration(maxAttempts+1)*hc.Interval)
defer deadline.Stop() defer deadline.Stop()
ticker := time.NewTicker(healthcheckInterval) ticker := time.NewTicker(hc.Interval)
defer ticker.Stop() defer ticker.Stop()
startedAt := time.Now()
failCount := 0
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
case <-deadline.C: case <-deadline.C:
return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout) return fmt.Errorf("healthcheck timed out: exceeded %d attempts over %s", failCount, time.Since(startedAt))
case <-ticker.C: case <-ticker.C:
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second) execCtx, cancel := context.WithTimeout(ctx, hc.Timeout)
resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxIDStr, SandboxId: sandboxIDStr,
Cmd: "/bin/sh", Cmd: "/bin/sh",
Args: []string{"-c", cmd}, Args: []string{"-c", hc.Cmd},
TimeoutSec: 10, TimeoutSec: int32(hc.Timeout.Seconds()),
})) }))
cancel() cancel()
if err != nil { if err != nil {
slog.Debug("healthcheck exec error (retrying)", "error", err) slog.Debug("healthcheck exec error (retrying)", "error", err)
if time.Since(startedAt) >= hc.StartPeriod {
failCount++
if hc.Retries > 0 && failCount >= hc.Retries {
return fmt.Errorf("healthcheck failed after %d retries: exec error: %v", failCount, err)
}
}
continue continue
} }
if resp.Msg.ExitCode == 0 { if resp.Msg.ExitCode == 0 {
return nil return nil
} }
slog.Debug("healthcheck failed (retrying)", "exit_code", resp.Msg.ExitCode) slog.Debug("healthcheck failed (retrying)", "exit_code", resp.Msg.ExitCode)
if time.Since(startedAt) >= hc.StartPeriod {
failCount++
if hc.Retries > 0 && failCount >= hc.Retries {
return fmt.Errorf("healthcheck failed after %d retries: exit code %d", failCount, resp.Msg.ExitCode)
}
}
} }
} }
} }
@ -517,3 +554,49 @@ func (s *BuildService) destroySandbox(_ context.Context, agent buildAgentClient,
slog.Warn("failed to destroy build sandbox", "sandbox_id", sandboxIDStr, "error", err) slog.Warn("failed to destroy build sandbox", "sandbox_id", sandboxIDStr, "error", err)
} }
} }
// fetchSandboxEnv executes the 'env' command inside the specified sandbox via
// the build agent and returns environment variables
func (s *BuildService) fetchSandboxEnv(ctx context.Context,
agent buildAgentClient, sandboxIDStr string) (map[string]string, error) {
resp, err := agent.Exec(ctx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxIDStr,
Cmd: "/bin/sh",
Args: []string{"-c", "env"},
TimeoutSec: 10,
}))
if err != nil {
return nil, fmt.Errorf("fetch env: %w", err)
}
if resp.Msg.ExitCode != 0 {
return nil, fmt.Errorf("fetch env: command exited with code %d",
resp.Msg.ExitCode)
}
return s.parseSandboxEnv(string(resp.Msg.Stdout)), nil
}
// parseSandboxEnv converts the raw newline-separated output of an 'env'
// command into a map.
// It skips empty lines and malformed entries, and correctly handles value
// containing '='.
func (s *BuildService) parseSandboxEnv(raw string) map[string]string {
envVars := make(map[string]string)
for line := range strings.SplitSeq(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
continue
}
envVars[parts[0]] = parts[1]
}
return envVars
}

View File

@ -0,0 +1 @@
--interval=5s --timeout=3s --start-period=3s --retries=3 python3 -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8888/api/status', timeout=3)"

View File

@ -0,0 +1,7 @@
RUN apt-get install -y --no-install-recommends python3 python3-pip python3-venv
RUN python3 -m venv /opt/venv
ENV PATH=/opt/venv/bin:$PATH
RUN --timeout=5m pip install --no-cache-dir notebook
START jupyter notebook --no-browser --ip=0.0.0.0 --port=8888 --ServerApp.token='' --ServerApp.allow_origin='*' --allow-root