From 4f340b8847eaa68ddd812879c8731e5b33e3a5ef Mon Sep 17 00:00:00 2001 From: Tasnim Kabir Sadik Date: Tue, 7 Apr 2026 01:15:43 +0600 Subject: [PATCH] feat: add env expansion, sandbox env fetching, and configurable healthchecks Fix ENV instructions to expand $VAR references at set time using the current env state, preventing self-referencing values like PATH=/opt/venv/bin:$PATH from producing recursive expansions. Remove expandEnv from shellPrefix to avoid double expansion. Fetch sandbox environment variables via `env` before recipe execution so ENV steps resolve against actual runtime values from the base template image. Replace hardcoded healthcheck timing with a Dockerfile-like flag parser supporting --interval, --timeout, --start-period, and --retries. Add start-period grace window and bounded retry counting to waitForHealthcheck. Add python-interpreter-v0-beta recipe and healthcheck files. --- Makefile | 2 +- images/wrenn-init.sh | 28 ++++ internal/recipe/context.go | 72 +++++++++- internal/recipe/context_test.go | 111 +++++++++++++++ internal/recipe/executor.go | 2 +- internal/recipe/healthcheck.go | 94 +++++++++++++ internal/recipe/healthcheck_test.go | 126 ++++++++++++++++++ internal/service/build.go | 111 +++++++++++++-- .../python-interpreter-v0-beta.healthcheck | 1 + recipes/python-interpreter-v0-beta.recipefile | 7 + 10 files changed, 537 insertions(+), 17 deletions(-) create mode 100644 internal/recipe/healthcheck.go create mode 100644 internal/recipe/healthcheck_test.go create mode 100644 recipes/python-interpreter-v0-beta.healthcheck create mode 100644 recipes/python-interpreter-v0-beta.recipefile diff --git a/Makefile b/Makefile index 80fbd3a..2dbcc76 100644 --- a/Makefile +++ b/Makefile @@ -137,7 +137,7 @@ image-minimal: sudo bash images/templates/minimal/build.sh image-python: - sudo bash images/templates/python311/build.sh + sudo bash images/templates/python312/build.sh image-node: sudo bash images/templates/node20/build.sh diff --git a/images/wrenn-init.sh b/images/wrenn-init.sh index 266e516..2a5bba4 100644 --- a/images/wrenn-init.sh +++ b/images/wrenn-init.sh @@ -20,6 +20,34 @@ echo "+cpu +memory +io" > /sys/fs/cgroup/cgroup.subtree_control 2>/dev/null || t # Set hostname hostname sandbox +# Configure networking from kernel cmdline (ip=client::gw:mask:host:iface:autoconf). +# if command -v ip >/dev/null 2>&1; then +# iparg=$(cat /proc/cmdline | tr ' ' '\n' | sed -n 's/^ip=//p') +# if [ -n "$iparg" ]; then +# client=$(echo "$iparg" | cut -d: -f1) +# gw=$(echo "$iparg" | cut -d: -f2) +# mask=$(echo "$iparg" | cut -d: -f3) +# iface=$(echo "$iparg" | cut -d: -f5) +# [ -z "$iface" ] && iface=eth0 +# if [ -n "$client" ]; then +# ip addr add "$client/${mask:-30}" dev "$iface" 2>/dev/null || true +# ip link set "$iface" up 2>/dev/null || true +# if [ -n "$gw" ]; then +# ip route add default via "$gw" 2>/dev/null || true +# fi +# fi +# fi +# fi +# +# +if ! ip addr show eth0 2>/dev/null | grep -q "169.254.0.21"; then + ip link set lo up + ip link set eth0 up + ip addr add 169.254.0.21/30 dev eth0 + ip route add default via 169.254.0.22 +fi + + # Configure DNS resolver. echo "nameserver 8.8.8.8" > /etc/resolv.conf echo "nameserver 8.8.4.4" >> /etc/resolv.conf diff --git a/internal/recipe/context.go b/internal/recipe/context.go index db4c39c..7592595 100644 --- a/internal/recipe/context.go +++ b/internal/recipe/context.go @@ -1,6 +1,8 @@ package recipe -import "strings" +import ( + "strings" +) // ExecContext holds mutable state that persists across recipe steps. // It is initialized empty and updated by ENV and WORKDIR steps. @@ -56,6 +58,74 @@ func (c *ExecContext) shellPrefix() string { return sb.String() } +// expandEnv replaces $var and ${var} placeholders in the string s with their +// corresponding values from the vars map. +// It supports escaping with $$, which is replaced by a single $. +// If a variable is not found in the vars map, it is replaced with an empty +// string. +func expandEnv(s string, vars map[string]string) string { + var sb strings.Builder + sb.Grow(len(s) * 2) + + for { + idx := strings.IndexByte(s, '$') + if idx < 0 { + sb.WriteString(s) + break + } + + sb.WriteString(s[:idx]) + s = s[idx:] + + if len(s) == 1 { + sb.WriteByte('$') + break + } + + if s[1] == '$' { + sb.WriteByte('$') + s = s[2:] + continue + } + + var name string + var advance int + + if s[1] == '{' { + end := strings.IndexByte(s[2:], '}') + if end < 0 { + sb.WriteByte('$') + s = s[1:] + continue + } + name = s[2 : 2+end] + advance = 2 + end + 1 + } else { + j := 1 + for j < len(s) && isNameChar(s[j]) { + j++ + } + name = s[1:j] + advance = j + } + + if v, ok := vars[name]; ok { + sb.WriteString(v) + } + + s = s[advance:] + } + + return sb.String() +} + +// isNameChar reports whether the byte c is a valid character for an +// environment variable name (alphanumeric or underscore) +func isNameChar(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '_' +} + // shellescape wraps s in single quotes, escaping any embedded single quotes. // This is POSIX-safe for paths, env values, and shell commands. func shellescape(s string) string { diff --git a/internal/recipe/context_test.go b/internal/recipe/context_test.go index b00dfce..adb1a4c 100644 --- a/internal/recipe/context_test.go +++ b/internal/recipe/context_test.go @@ -45,6 +45,14 @@ func TestExecContext_WrappedCommand(t *testing.T) { cmd: "echo $MSG", want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'", }, + { + name: "env expansion with dollar sign PATH", + ctx: ExecContext{ + EnvVars: map[string]string{"PATH": "/usr/bin", "FOO": "/opt/venv/bin:$PATH"}, + }, + cmd: "make build", + want: "FOO='/opt/venv/bin:/usr/bin' PATH='/usr/bin' /bin/sh -c 'make build'", + }, } for _, tc := range tests { @@ -94,6 +102,109 @@ func TestExecContext_StartCommand(t *testing.T) { } } +func TestExpandEnv(t *testing.T) { + tests := []struct { + s string + vars map[string]string + want string + }{ + { + s: "hello", + vars: nil, + want: "hello", + }, + { + s: "$PATH", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "/usr/bin", + }, + { + s: "${PATH}", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "/usr/bin", + }, + { + s: "/opt/venv/bin:$PATH", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "/opt/venv/bin:/usr/bin", + }, + { + s: "${HOME}/code", + vars: map[string]string{"HOME": "/root"}, + want: "/root/code", + }, + { + s: "hello $USER", + vars: map[string]string{"USER": "admin"}, + want: "hello admin", + }, + { + s: "$UNSET", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "", + }, + { + s: "${UNSET}", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "", + }, + { + s: "$$", + vars: map[string]string{"PATH": "/usr/bin"}, + want: "$", + }, + { + s: "price is $$100", + vars: nil, + want: "price is $100", + }, + { + s: "$FOO:$BAR", + vars: map[string]string{"FOO": "a", "BAR": "b"}, + want: "a:b", + }, + { + s: "${FOO}_${BAR}", + vars: map[string]string{"FOO": "hello", "BAR": "world"}, + want: "hello_world", + }, + { + s: "no vars here", + vars: nil, + want: "no vars here", + }, + { + s: "$", + vars: nil, + want: "$", + }, + { + s: "${", + vars: nil, + want: "${", + }, + { + s: "${}", + vars: nil, + want: "", + }, + { + s: "$VAR1$VAR2", + vars: map[string]string{"VAR1": "a", "VAR2": "b"}, + want: "ab", + }, + } + + for _, tc := range tests { + t.Run(tc.s, func(t *testing.T) { + got := expandEnv(tc.s, tc.vars) + if got != tc.want { + t.Errorf("expandEnv(%q, %v)\n got %q\n want %q", tc.s, tc.vars, got, tc.want) + } + }) + } +} + func TestShellescape(t *testing.T) { tests := []struct { input string diff --git a/internal/recipe/executor.go b/internal/recipe/executor.go index 3df45dc..aa4b305 100644 --- a/internal/recipe/executor.go +++ b/internal/recipe/executor.go @@ -68,7 +68,7 @@ func Execute( if bctx.EnvVars == nil { bctx.EnvVars = make(map[string]string) } - bctx.EnvVars[st.Key] = st.Value + bctx.EnvVars[st.Key] = expandEnv(st.Value, bctx.EnvVars) entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true}) case KindWORKDIR: diff --git a/internal/recipe/healthcheck.go b/internal/recipe/healthcheck.go new file mode 100644 index 0000000..6e48f73 --- /dev/null +++ b/internal/recipe/healthcheck.go @@ -0,0 +1,94 @@ +package recipe + +import ( + "fmt" + "strconv" + "strings" + "time" +) + +// HealthcheckConfig holds the parsed configuration for a build healthcheck. +// A healthcheck is a shell command that is executed repeatedly inside the +// sandbox until it succeeds or the retry/timeout budget is exhausted. +// +// Retries of 0 means unlimited retries (bounded only by the overall deadline) +type HealthcheckConfig struct { + Cmd string + Interval time.Duration + Timeout time.Duration + StartPeriod time.Duration + Retries int // 0 = unlimited +} + +// ParseHealthcheck parses a healthcheck string with optional flag prefix into +// a HealthcheckConfig. The syntax is: +// +// [--interval=] [--timeout=] [--start-period=] +// [--retries=] +// +// Flags must use the form --flag=value. The first token that does not start +// with "--" and everything after it is treated as the command. Defaults: +// interval=3s, timeout=10s, start-period=0, retries=0 (unlimited) +func ParseHealthcheck(s string) (HealthcheckConfig, error) { + s = strings.TrimSpace(s) + if s == "" { + return HealthcheckConfig{}, fmt.Errorf("empty healthcheck") + } + + hc := HealthcheckConfig{ + Interval: 3 * time.Second, + Timeout: 10 * time.Second, + } + + tokens := strings.Fields(s) + cmdIndex := -1 + + for i, token := range tokens { + if !strings.HasPrefix(token, "--") { + cmdIndex = i + break + } + + parts := strings.SplitN(token, "=", 2) + if len(parts) != 2 { + return HealthcheckConfig{}, fmt.Errorf("malformed flag (missing '='): %q", token) + } + + key, val := parts[0], parts[1] + switch key { + case "--interval": + d, err := time.ParseDuration(val) + if err != nil { + return HealthcheckConfig{}, fmt.Errorf("parse interval: %w", err) + } + hc.Interval = d + case "--timeout": + d, err := time.ParseDuration(val) + if err != nil { + return HealthcheckConfig{}, fmt.Errorf("parse timeout: %w", err) + } + hc.Timeout = d + case "--start-period": + d, err := time.ParseDuration(val) + if err != nil { + return HealthcheckConfig{}, fmt.Errorf("parse start period: %w", err) + } + hc.StartPeriod = d + case "--retries": + r, err := strconv.Atoi(val) + if err != nil { + return HealthcheckConfig{}, fmt.Errorf("parse retries: %w", err) + } + hc.Retries = r + default: + return HealthcheckConfig{}, fmt.Errorf("unknown healthcheck flag: %q", token) + } + } + + if cmdIndex == -1 { + return HealthcheckConfig{}, fmt.Errorf("healthcheck has no command") + } + + hc.Cmd = strings.Join(tokens[cmdIndex:], " ") + return hc, nil +} diff --git a/internal/recipe/healthcheck_test.go b/internal/recipe/healthcheck_test.go new file mode 100644 index 0000000..528e109 --- /dev/null +++ b/internal/recipe/healthcheck_test.go @@ -0,0 +1,126 @@ +package recipe + +import ( + "testing" + "time" +) + +func TestParseHealthcheck(t *testing.T) { + tests := []struct { + name string + input string + want HealthcheckConfig + wantErr bool + }{ + { + name: "plain command", + input: "curl -f http://localhost:8080", + want: HealthcheckConfig{ + Cmd: "curl -f http://localhost:8080", + Interval: 3 * time.Second, + Timeout: 10 * time.Second, + }, + wantErr: false, + }, + { + name: "all flags", + input: "--interval=5s --timeout=2s --start-period=15s --retries=3 ping -c 1 8.8.8.8", + want: HealthcheckConfig{ + Cmd: "ping -c 1 8.8.8.8", + Interval: 5 * time.Second, + Timeout: 2 * time.Second, + StartPeriod: 15 * time.Second, + Retries: 3, + }, + wantErr: false, + }, + { + name: "partial flags", + input: "--timeout=5s my-custom-check --verbose", + want: HealthcheckConfig{ + Cmd: "my-custom-check --verbose", + Interval: 3 * time.Second, + Timeout: 5 * time.Second, + }, + wantErr: false, + }, + { + name: "retries only", + input: "--retries=5 test.sh", + want: HealthcheckConfig{ + Cmd: "test.sh", + Interval: 3 * time.Second, + Timeout: 10 * time.Second, + Retries: 5, + }, + wantErr: false, + }, + { + name: "empty string", + input: "", + wantErr: true, + }, + { + name: "whitespace only", + input: " \t \n ", + wantErr: true, + }, + { + name: "flags but no command", + input: "--interval=5s --retries=2", + wantErr: true, + }, + { + name: "unknown flag", + input: "--magic=true my-check", + wantErr: true, + }, + { + name: "invalid duration", + input: "--interval=5smiles check.sh", + wantErr: true, + }, + { + name: "invalid retries", + input: "--retries=five check.sh", + wantErr: true, + }, + { + name: "command with dashes", + input: "--interval=2s command-with-dash --flag=value", + want: HealthcheckConfig{ + Cmd: "command-with-dash --flag=value", + Interval: 2 * time.Second, + Timeout: 10 * time.Second, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseHealthcheck(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ParseHealthcheck() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr { + if got.Cmd != tt.want.Cmd { + t.Errorf("Cmd got = %v, want %v", got.Cmd, tt.want.Cmd) + } + if got.Interval != tt.want.Interval { + t.Errorf("Interval got = %v, want %v", got.Interval, tt.want.Interval) + } + if got.Timeout != tt.want.Timeout { + t.Errorf("Timeout got = %v, want %v", got.Timeout, tt.want.Timeout) + } + if got.StartPeriod != tt.want.StartPeriod { + t.Errorf("StartPeriod got = %v, want %v", got.StartPeriod, tt.want.StartPeriod) + } + if got.Retries != tt.want.Retries { + t.Errorf("Retries got = %v, want %v", got.Retries, tt.want.Retries) + } + } + }) + } +} diff --git a/internal/service/build.go b/internal/service/build.go index 1108044..57ca500 100644 --- a/internal/service/build.go +++ b/internal/service/build.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log/slog" + "strings" "sync" "time" @@ -23,8 +24,6 @@ import ( const ( buildQueueKey = "wrenn:build_queue" buildCommandTimeout = 30 * time.Second - healthcheckInterval = 1 * time.Second - healthcheckTimeout = 60 * time.Second ) // preBuildCmds run before the user recipe to prepare the build environment. @@ -321,11 +320,18 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { panic(fmt.Sprintf("invalid post-build recipe: %v", err)) } - // Execute build phases: pre-build → user recipe → post-build. - // bctx carries working directory and env vars across all phases. var logs []recipe.BuildLogEntry step := 0 - bctx := &recipe.ExecContext{} + + envVars, err := s.fetchSandboxEnv(buildCtx, agent, sandboxIDStr) + if err != nil { + log.Warn("failed to fetch sandbox env, using defaults", "error", err) + envVars = map[string]string{ + "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOME": "/root", + } + } + bctx := &recipe.ExecContext{EnvVars: envVars} runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool { newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec) @@ -365,8 +371,14 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { // Healthcheck or direct snapshot. var sizeBytes int64 if build.Healthcheck != "" { - log.Info("running healthcheck", "cmd", build.Healthcheck) - if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil { + hc, err := recipe.ParseHealthcheck(build.Healthcheck) + if err != nil { + s.destroySandbox(buildCtx, agent, sandboxIDStr) + s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid healthcheck: %v", err)) + return + } + log.Info("running healthcheck", "cmd", hc.Cmd, "interval", hc.Interval, "timeout", hc.Timeout, "start_period", hc.StartPeriod, "retries", hc.Retries) + if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc); err != nil { s.destroySandbox(buildCtx, agent, sandboxIDStr) if buildCtx.Err() != nil { return @@ -445,36 +457,61 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { log.Info("template build completed successfully", "name", build.Name) } -func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr, cmd string) error { - deadline := time.NewTimer(healthcheckTimeout) +// waitForHealthcheck repeatedly executes the healthcheck command inside the +// sandbox according to the config's interval, timeout, start-period, and +// retries. +// During the start period, failures are not counted toward the retry budget. +// Returns nil on the first successful check, or an error if retries are +// exhausted, the deadline passes, or the context is cancelled. +func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig) error { + maxAttempts := 100 + if hc.Retries > 0 { + maxAttempts = hc.Retries + } + deadline := time.NewTimer(hc.StartPeriod + time.Duration(maxAttempts+1)*hc.Interval) defer deadline.Stop() - ticker := time.NewTicker(healthcheckInterval) + ticker := time.NewTicker(hc.Interval) defer ticker.Stop() + startedAt := time.Now() + failCount := 0 + for { select { case <-ctx.Done(): return ctx.Err() case <-deadline.C: - return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout) + return fmt.Errorf("healthcheck timed out: exceeded %d attempts over %s", failCount, time.Since(startedAt)) case <-ticker.C: - execCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + execCtx, cancel := context.WithTimeout(ctx, hc.Timeout) resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ SandboxId: sandboxIDStr, Cmd: "/bin/sh", - Args: []string{"-c", cmd}, - TimeoutSec: 10, + Args: []string{"-c", hc.Cmd}, + TimeoutSec: int32(hc.Timeout.Seconds()), })) cancel() if err != nil { slog.Debug("healthcheck exec error (retrying)", "error", err) + if time.Since(startedAt) >= hc.StartPeriod { + failCount++ + if hc.Retries > 0 && failCount >= hc.Retries { + return fmt.Errorf("healthcheck failed after %d retries: exec error: %v", failCount, err) + } + } continue } if resp.Msg.ExitCode == 0 { return nil } slog.Debug("healthcheck failed (retrying)", "exit_code", resp.Msg.ExitCode) + if time.Since(startedAt) >= hc.StartPeriod { + failCount++ + if hc.Retries > 0 && failCount >= hc.Retries { + return fmt.Errorf("healthcheck failed after %d retries: exit code %d", failCount, resp.Msg.ExitCode) + } + } } } } @@ -517,3 +554,49 @@ func (s *BuildService) destroySandbox(_ context.Context, agent buildAgentClient, slog.Warn("failed to destroy build sandbox", "sandbox_id", sandboxIDStr, "error", err) } } + +// fetchSandboxEnv executes the 'env' command inside the specified sandbox via +// the build agent and returns environment variables +func (s *BuildService) fetchSandboxEnv(ctx context.Context, + agent buildAgentClient, sandboxIDStr string) (map[string]string, error) { + resp, err := agent.Exec(ctx, connect.NewRequest(&pb.ExecRequest{ + SandboxId: sandboxIDStr, + Cmd: "/bin/sh", + Args: []string{"-c", "env"}, + TimeoutSec: 10, + })) + if err != nil { + return nil, fmt.Errorf("fetch env: %w", err) + } + + if resp.Msg.ExitCode != 0 { + return nil, fmt.Errorf("fetch env: command exited with code %d", + resp.Msg.ExitCode) + } + + return s.parseSandboxEnv(string(resp.Msg.Stdout)), nil +} + +// parseSandboxEnv converts the raw newline-separated output of an 'env' +// command into a map. +// It skips empty lines and malformed entries, and correctly handles value +// containing '='. +func (s *BuildService) parseSandboxEnv(raw string) map[string]string { + envVars := make(map[string]string) + + for line := range strings.SplitSeq(raw, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + parts := strings.SplitN(line, "=", 2) + if len(parts) != 2 { + continue + } + + envVars[parts[0]] = parts[1] + } + + return envVars +} diff --git a/recipes/python-interpreter-v0-beta.healthcheck b/recipes/python-interpreter-v0-beta.healthcheck new file mode 100644 index 0000000..ca2555c --- /dev/null +++ b/recipes/python-interpreter-v0-beta.healthcheck @@ -0,0 +1 @@ +--interval=5s --timeout=3s --start-period=3s --retries=3 python3 -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8888/api/status', timeout=3)" diff --git a/recipes/python-interpreter-v0-beta.recipefile b/recipes/python-interpreter-v0-beta.recipefile new file mode 100644 index 0000000..50221b4 --- /dev/null +++ b/recipes/python-interpreter-v0-beta.recipefile @@ -0,0 +1,7 @@ +RUN apt-get install -y --no-install-recommends python3 python3-pip python3-venv +RUN python3 -m venv /opt/venv +ENV PATH=/opt/venv/bin:$PATH + +RUN --timeout=5m pip install --no-cache-dir notebook + +START jupyter notebook --no-browser --ip=0.0.0.0 --port=8888 --ServerApp.token='' --ServerApp.allow_origin='*' --allow-root