1
0
forked from wrenn/wrenn

feat: add env expansion, sandbox env fetching, and configurable

healthchecks

Fix ENV instructions to expand $VAR references at set time using the
current env state, preventing self-referencing values like
PATH=/opt/venv/bin:$PATH from producing recursive expansions. Remove
expandEnv from shellPrefix to avoid double expansion.

Fetch sandbox environment variables via `env` before recipe execution
so ENV steps resolve against actual runtime values from the base
template image.

Replace hardcoded healthcheck timing with a Dockerfile-like flag parser
supporting --interval, --timeout, --start-period, and --retries. Add
start-period grace window and bounded retry counting to
waitForHealthcheck.

Add python-interpreter-v0-beta recipe and healthcheck files.
This commit is contained in:
Tasnim Kabir Sadik
2026-04-07 01:15:43 +06:00
parent ab38c8372c
commit 4f340b8847
10 changed files with 537 additions and 17 deletions

View File

@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"log/slog"
"strings"
"sync"
"time"
@ -23,8 +24,6 @@ import (
const (
buildQueueKey = "wrenn:build_queue"
buildCommandTimeout = 30 * time.Second
healthcheckInterval = 1 * time.Second
healthcheckTimeout = 60 * time.Second
)
// preBuildCmds run before the user recipe to prepare the build environment.
@ -321,11 +320,18 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
panic(fmt.Sprintf("invalid post-build recipe: %v", err))
}
// Execute build phases: pre-build → user recipe → post-build.
// bctx carries working directory and env vars across all phases.
var logs []recipe.BuildLogEntry
step := 0
bctx := &recipe.ExecContext{}
envVars, err := s.fetchSandboxEnv(buildCtx, agent, sandboxIDStr)
if err != nil {
log.Warn("failed to fetch sandbox env, using defaults", "error", err)
envVars = map[string]string{
"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOME": "/root",
}
}
bctx := &recipe.ExecContext{EnvVars: envVars}
runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool {
newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec)
@ -365,8 +371,14 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
// Healthcheck or direct snapshot.
var sizeBytes int64
if build.Healthcheck != "" {
log.Info("running healthcheck", "cmd", build.Healthcheck)
if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil {
hc, err := recipe.ParseHealthcheck(build.Healthcheck)
if err != nil {
s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid healthcheck: %v", err))
return
}
log.Info("running healthcheck", "cmd", hc.Cmd, "interval", hc.Interval, "timeout", hc.Timeout, "start_period", hc.StartPeriod, "retries", hc.Retries)
if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc); err != nil {
s.destroySandbox(buildCtx, agent, sandboxIDStr)
if buildCtx.Err() != nil {
return
@ -445,36 +457,61 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
log.Info("template build completed successfully", "name", build.Name)
}
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr, cmd string) error {
deadline := time.NewTimer(healthcheckTimeout)
// waitForHealthcheck repeatedly executes the healthcheck command inside the
// sandbox according to the config's interval, timeout, start-period, and
// retries.
// During the start period, failures are not counted toward the retry budget.
// Returns nil on the first successful check, or an error if retries are
// exhausted, the deadline passes, or the context is cancelled.
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig) error {
maxAttempts := 100
if hc.Retries > 0 {
maxAttempts = hc.Retries
}
deadline := time.NewTimer(hc.StartPeriod + time.Duration(maxAttempts+1)*hc.Interval)
defer deadline.Stop()
ticker := time.NewTicker(healthcheckInterval)
ticker := time.NewTicker(hc.Interval)
defer ticker.Stop()
startedAt := time.Now()
failCount := 0
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-deadline.C:
return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout)
return fmt.Errorf("healthcheck timed out: exceeded %d attempts over %s", failCount, time.Since(startedAt))
case <-ticker.C:
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
execCtx, cancel := context.WithTimeout(ctx, hc.Timeout)
resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxIDStr,
Cmd: "/bin/sh",
Args: []string{"-c", cmd},
TimeoutSec: 10,
Args: []string{"-c", hc.Cmd},
TimeoutSec: int32(hc.Timeout.Seconds()),
}))
cancel()
if err != nil {
slog.Debug("healthcheck exec error (retrying)", "error", err)
if time.Since(startedAt) >= hc.StartPeriod {
failCount++
if hc.Retries > 0 && failCount >= hc.Retries {
return fmt.Errorf("healthcheck failed after %d retries: exec error: %v", failCount, err)
}
}
continue
}
if resp.Msg.ExitCode == 0 {
return nil
}
slog.Debug("healthcheck failed (retrying)", "exit_code", resp.Msg.ExitCode)
if time.Since(startedAt) >= hc.StartPeriod {
failCount++
if hc.Retries > 0 && failCount >= hc.Retries {
return fmt.Errorf("healthcheck failed after %d retries: exit code %d", failCount, resp.Msg.ExitCode)
}
}
}
}
}
@ -517,3 +554,49 @@ func (s *BuildService) destroySandbox(_ context.Context, agent buildAgentClient,
slog.Warn("failed to destroy build sandbox", "sandbox_id", sandboxIDStr, "error", err)
}
}
// fetchSandboxEnv executes the 'env' command inside the specified sandbox via
// the build agent and returns environment variables
func (s *BuildService) fetchSandboxEnv(ctx context.Context,
agent buildAgentClient, sandboxIDStr string) (map[string]string, error) {
resp, err := agent.Exec(ctx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxIDStr,
Cmd: "/bin/sh",
Args: []string{"-c", "env"},
TimeoutSec: 10,
}))
if err != nil {
return nil, fmt.Errorf("fetch env: %w", err)
}
if resp.Msg.ExitCode != 0 {
return nil, fmt.Errorf("fetch env: command exited with code %d",
resp.Msg.ExitCode)
}
return s.parseSandboxEnv(string(resp.Msg.Stdout)), nil
}
// parseSandboxEnv converts the raw newline-separated output of an 'env'
// command into a map.
// It skips empty lines and malformed entries, and correctly handles value
// containing '='.
func (s *BuildService) parseSandboxEnv(raw string) map[string]string {
envVars := make(map[string]string)
for line := range strings.SplitSeq(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
continue
}
envVars[parts[0]] = parts[1]
}
return envVars
}