1
0
forked from wrenn/wrenn
Files
wrenn-releases/internal/recipe/executor.go
Tasnim Kabir Sadik 4f340b8847 feat: add env expansion, sandbox env fetching, and configurable
healthchecks

Fix ENV instructions to expand $VAR references at set time using the
current env state, preventing self-referencing values like
PATH=/opt/venv/bin:$PATH from producing recursive expansions. Remove
expandEnv from shellPrefix to avoid double expansion.

Fetch sandbox environment variables via `env` before recipe execution
so ENV steps resolve against actual runtime values from the base
template image.

Replace hardcoded healthcheck timing with a Dockerfile-like flag parser
supporting --interval, --timeout, --start-period, and --retries. Add
start-period grace window and bounded retry counting to
waitForHealthcheck.

Add python-interpreter-v0-beta recipe and healthcheck files.
2026-04-07 01:15:43 +06:00

186 lines
5.1 KiB
Go

package recipe
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"connectrpc.com/connect"
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
)
// DefaultStepTimeout is the fallback timeout for RUN steps that carry no
// explicit --timeout flag.
const DefaultStepTimeout = 30 * time.Second
// BuildLogEntry is the per-step record stored in template_builds.logs (JSONB).
type BuildLogEntry struct {
Step int `json:"step"`
Phase string `json:"phase"`
Cmd string `json:"cmd"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
Exit int32 `json:"exit"`
Ok bool `json:"ok"`
Elapsed int64 `json:"elapsed_ms"`
}
// ExecFunc is the agent.Exec call signature used by the executor. It matches
// the method on the hostagent Connect RPC client.
type ExecFunc func(ctx context.Context, req *connect.Request[pb.ExecRequest]) (*connect.Response[pb.ExecResponse], error)
// Execute runs steps sequentially against sandboxID using execFn.
//
// - phase labels the log entries (e.g., "pre-build", "recipe", "post-build").
// - startStep is the 1-based offset so entries are globally numbered across phases.
// - defaultTimeout applies to RUN steps with no per-step --timeout; 0 → 10 minutes.
// - bctx is mutated in place as ENV/WORKDIR steps execute, and carries forward
// into subsequent phases when the caller passes the same pointer.
//
// Returns all log entries appended during this call, the next step counter
// value, and whether all steps succeeded. On false the last entry contains
// failure details; the caller is responsible for destroying the sandbox and
// recording the build error.
func Execute(
ctx context.Context,
phase string,
steps []Step,
sandboxID string,
startStep int,
defaultTimeout time.Duration,
bctx *ExecContext,
execFn ExecFunc,
) (entries []BuildLogEntry, nextStep int, ok bool) {
if defaultTimeout <= 0 {
defaultTimeout = 10 * time.Minute
}
step := startStep
for _, st := range steps {
step++
slog.Info("executing build step", "phase", phase, "step", step, "instruction", st.Raw)
switch st.Kind {
case KindENV:
if bctx.EnvVars == nil {
bctx.EnvVars = make(map[string]string)
}
bctx.EnvVars[st.Key] = expandEnv(st.Value, bctx.EnvVars)
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
case KindWORKDIR:
bctx.WorkDir = st.Path
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
case KindUSER, KindCOPY:
verb := strings.ToUpper(strings.Fields(st.Raw)[0])
entries = append(entries, BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Stderr: verb + " is not yet supported",
Ok: false,
})
return entries, step, false
case KindSTART:
entry, succeeded := execStart(ctx, st, sandboxID, phase, step, bctx, execFn)
entries = append(entries, entry)
if !succeeded {
return entries, step, false
}
case KindRUN:
timeout := defaultTimeout
if st.Timeout > 0 {
timeout = st.Timeout
}
entry, succeeded := execRun(ctx, st, sandboxID, phase, step, timeout, bctx, execFn)
entries = append(entries, entry)
if !succeeded {
return entries, step, false
}
}
}
return entries, step, true
}
func execRun(
ctx context.Context,
st Step,
sandboxID, phase string,
step int,
timeout time.Duration,
bctx *ExecContext,
execFn ExecFunc,
) (BuildLogEntry, bool) {
execCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
start := time.Now()
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxID,
Cmd: "/bin/sh",
Args: []string{"-c", bctx.WrappedCommand(st.Shell)},
TimeoutSec: int32(timeout.Seconds()),
}))
entry := BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Elapsed: time.Since(start).Milliseconds(),
}
if err != nil {
entry.Stderr = fmt.Sprintf("exec error: %v", err)
return entry, false
}
entry.Stdout = string(resp.Msg.Stdout)
entry.Stderr = string(resp.Msg.Stderr)
entry.Exit = resp.Msg.ExitCode
entry.Ok = resp.Msg.ExitCode == 0
return entry, entry.Ok
}
func execStart(
ctx context.Context,
st Step,
sandboxID, phase string,
step int,
bctx *ExecContext,
execFn ExecFunc,
) (BuildLogEntry, bool) {
// START uses a short timeout: just long enough for the shell to fork and
// return. The background process itself runs indefinitely inside the VM.
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
start := time.Now()
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxID,
Cmd: "/bin/sh",
Args: []string{"-c", bctx.StartCommand(st.Shell)},
TimeoutSec: 10,
}))
entry := BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Elapsed: time.Since(start).Milliseconds(),
}
if err != nil {
entry.Stderr = fmt.Sprintf("start error: %v", err)
return entry, false
}
entry.Exit = resp.Msg.ExitCode
entry.Ok = resp.Msg.ExitCode == 0
if !entry.Ok {
entry.Stderr = fmt.Sprintf("start failed with exit code %d: %s", resp.Msg.ExitCode, string(resp.Msg.Stderr))
}
return entry, entry.Ok
}