forked from wrenn/wrenn
Add skip_pre_post build option, cancel endpoint, and recipe package
- skip_pre_post flag on builds bypasses apt update/clean pre/post steps for
faster iteration when the recipe handles its own environment setup
- POST /v1/admin/builds/{id}/cancel endpoint marks an in-progress build as
cancelled; UpdateBuildStatus now also sets completed_at for 'cancelled'
- internal/recipe: typed recipe parser and executor (RUN/ENV/COPY steps)
replacing the raw string slice approach in the build worker
- pre/post build commands prefixed with RUN to match recipe step format
This commit is contained in:
@ -36,6 +36,7 @@ type createBuildRequest struct {
|
||||
Healthcheck string `json:"healthcheck"`
|
||||
VCPUs int32 `json:"vcpus"`
|
||||
MemoryMB int32 `json:"memory_mb"`
|
||||
SkipPrePost bool `json:"skip_pre_post"`
|
||||
}
|
||||
|
||||
type buildResponse struct {
|
||||
@ -127,6 +128,7 @@ func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) {
|
||||
Healthcheck: req.Healthcheck,
|
||||
VCPUs: req.VCPUs,
|
||||
MemoryMB: req.MemoryMB,
|
||||
SkipPrePost: req.SkipPrePost,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("failed to create build", "error", err)
|
||||
@ -254,3 +256,21 @@ func (h *buildHandler) DeleteTemplate(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// Cancel handles POST /v1/admin/builds/{id}/cancel.
|
||||
func (h *buildHandler) Cancel(w http.ResponseWriter, r *http.Request) {
|
||||
buildIDStr := chi.URLParam(r, "id")
|
||||
|
||||
buildID, err := id.ParseBuildID(buildIDStr)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid_request", "invalid build ID")
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.svc.Cancel(r.Context(), buildID); err != nil {
|
||||
writeError(w, http.StatusBadRequest, "invalid_request", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@ import (
|
||||
)
|
||||
|
||||
const getTemplateBuild = `-- name: GetTemplateBuild :one
|
||||
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds WHERE id = $1
|
||||
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds WHERE id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (TemplateBuild, error) {
|
||||
@ -38,14 +38,15 @@ func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (Templat
|
||||
&i.CompletedAt,
|
||||
&i.TemplateID,
|
||||
&i.TeamID,
|
||||
&i.SkipPrePost,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const insertTemplateBuild = `-- name: InsertTemplateBuild :one
|
||||
INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10)
|
||||
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id
|
||||
INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id, skip_pre_post)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10, $11)
|
||||
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post
|
||||
`
|
||||
|
||||
type InsertTemplateBuildParams struct {
|
||||
@ -59,6 +60,7 @@ type InsertTemplateBuildParams struct {
|
||||
TotalSteps int32 `json:"total_steps"`
|
||||
TemplateID pgtype.UUID `json:"template_id"`
|
||||
TeamID pgtype.UUID `json:"team_id"`
|
||||
SkipPrePost bool `json:"skip_pre_post"`
|
||||
}
|
||||
|
||||
func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBuildParams) (TemplateBuild, error) {
|
||||
@ -73,6 +75,7 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui
|
||||
arg.TotalSteps,
|
||||
arg.TemplateID,
|
||||
arg.TeamID,
|
||||
arg.SkipPrePost,
|
||||
)
|
||||
var i TemplateBuild
|
||||
err := row.Scan(
|
||||
@ -95,12 +98,13 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui
|
||||
&i.CompletedAt,
|
||||
&i.TemplateID,
|
||||
&i.TeamID,
|
||||
&i.SkipPrePost,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const listTemplateBuilds = `-- name: ListTemplateBuilds :many
|
||||
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds ORDER BY created_at DESC
|
||||
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds ORDER BY created_at DESC
|
||||
`
|
||||
|
||||
func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, error) {
|
||||
@ -132,6 +136,7 @@ func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, erro
|
||||
&i.CompletedAt,
|
||||
&i.TemplateID,
|
||||
&i.TeamID,
|
||||
&i.SkipPrePost,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -196,10 +201,10 @@ func (q *Queries) UpdateBuildSandbox(ctx context.Context, arg UpdateBuildSandbox
|
||||
const updateBuildStatus = `-- name: UpdateBuildStatus :one
|
||||
UPDATE template_builds
|
||||
SET status = $2,
|
||||
started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END,
|
||||
completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END
|
||||
started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END,
|
||||
completed_at = CASE WHEN $2 IN ('success', 'failed', 'cancelled') THEN NOW() ELSE completed_at END
|
||||
WHERE id = $1
|
||||
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id
|
||||
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post
|
||||
`
|
||||
|
||||
type UpdateBuildStatusParams struct {
|
||||
@ -230,6 +235,7 @@ func (q *Queries) UpdateBuildStatus(ctx context.Context, arg UpdateBuildStatusPa
|
||||
&i.CompletedAt,
|
||||
&i.TemplateID,
|
||||
&i.TeamID,
|
||||
&i.SkipPrePost,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
63
internal/recipe/context.go
Normal file
63
internal/recipe/context.go
Normal file
@ -0,0 +1,63 @@
|
||||
package recipe
|
||||
|
||||
import "strings"
|
||||
|
||||
// ExecContext holds mutable state that persists across recipe steps.
|
||||
// It is initialized empty and updated by ENV and WORKDIR steps.
|
||||
type ExecContext struct {
|
||||
WorkDir string
|
||||
EnvVars map[string]string
|
||||
}
|
||||
|
||||
// WrappedCommand returns the full shell command for a RUN step with context
|
||||
// applied. The result is passed as the argument to /bin/sh -c.
|
||||
//
|
||||
// If WORKDIR and/or ENV are set, they are prepended as a shell preamble:
|
||||
//
|
||||
// cd '/the/dir' && KEY='val' /bin/sh -c 'original command'
|
||||
func (c *ExecContext) WrappedCommand(cmd string) string {
|
||||
prefix := c.shellPrefix()
|
||||
if prefix == "" {
|
||||
return cmd
|
||||
}
|
||||
return prefix + "/bin/sh -c " + shellescape(cmd)
|
||||
}
|
||||
|
||||
// StartCommand returns the shell command for a START step. The process is
|
||||
// launched in the background via nohup so that the outer shell exits
|
||||
// immediately, allowing the build to continue. stdout/stderr of the
|
||||
// background process are discarded (the process keeps running in the VM).
|
||||
//
|
||||
// Multiple START steps can be issued to run several background processes
|
||||
// simultaneously before a healthcheck is evaluated.
|
||||
func (c *ExecContext) StartCommand(cmd string) string {
|
||||
prefix := c.shellPrefix()
|
||||
return prefix + "nohup /bin/sh -c " + shellescape(cmd) + " >/dev/null 2>&1 &"
|
||||
}
|
||||
|
||||
// shellPrefix builds the "cd ... && KEY=val " preamble for a shell command.
|
||||
// Returns an empty string when no context is set.
|
||||
func (c *ExecContext) shellPrefix() string {
|
||||
if c.WorkDir == "" && len(c.EnvVars) == 0 {
|
||||
return ""
|
||||
}
|
||||
var sb strings.Builder
|
||||
if c.WorkDir != "" {
|
||||
sb.WriteString("cd ")
|
||||
sb.WriteString(shellescape(c.WorkDir))
|
||||
sb.WriteString(" && ")
|
||||
}
|
||||
for k, v := range c.EnvVars {
|
||||
sb.WriteString(k)
|
||||
sb.WriteByte('=')
|
||||
sb.WriteString(shellescape(v))
|
||||
sb.WriteByte(' ')
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// shellescape wraps s in single quotes, escaping any embedded single quotes.
|
||||
// This is POSIX-safe for paths, env values, and shell commands.
|
||||
func shellescape(s string) string {
|
||||
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
|
||||
}
|
||||
114
internal/recipe/context_test.go
Normal file
114
internal/recipe/context_test.go
Normal file
@ -0,0 +1,114 @@
|
||||
package recipe
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestExecContext_WrappedCommand(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx ExecContext
|
||||
cmd string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no context",
|
||||
ctx: ExecContext{},
|
||||
cmd: "apt install -y curl",
|
||||
want: "apt install -y curl",
|
||||
},
|
||||
{
|
||||
name: "workdir only",
|
||||
ctx: ExecContext{WorkDir: "/app"},
|
||||
cmd: "npm install",
|
||||
want: "cd '/app' && /bin/sh -c 'npm install'",
|
||||
},
|
||||
{
|
||||
name: "env only",
|
||||
ctx: ExecContext{EnvVars: map[string]string{"PORT": "8080"}},
|
||||
cmd: "node server.js",
|
||||
want: "PORT='8080' /bin/sh -c 'node server.js'",
|
||||
},
|
||||
{
|
||||
name: "workdir with space",
|
||||
ctx: ExecContext{WorkDir: "/my project"},
|
||||
cmd: "make build",
|
||||
want: "cd '/my project' && /bin/sh -c 'make build'",
|
||||
},
|
||||
{
|
||||
name: "command with single quotes",
|
||||
ctx: ExecContext{WorkDir: "/app"},
|
||||
cmd: "echo 'hello'",
|
||||
want: "cd '/app' && /bin/sh -c 'echo '\\''hello'\\'''",
|
||||
},
|
||||
{
|
||||
name: "env value with single quotes",
|
||||
ctx: ExecContext{EnvVars: map[string]string{"MSG": "it's fine"}},
|
||||
cmd: "echo $MSG",
|
||||
want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := tc.ctx.WrappedCommand(tc.cmd)
|
||||
if got != tc.want {
|
||||
t.Errorf("WrappedCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecContext_StartCommand(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx ExecContext
|
||||
cmd string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no context",
|
||||
ctx: ExecContext{},
|
||||
cmd: "python3 app.py",
|
||||
want: "nohup /bin/sh -c 'python3 app.py' >/dev/null 2>&1 &",
|
||||
},
|
||||
{
|
||||
name: "with workdir",
|
||||
ctx: ExecContext{WorkDir: "/app"},
|
||||
cmd: "python3 server.py",
|
||||
want: "cd '/app' && nohup /bin/sh -c 'python3 server.py' >/dev/null 2>&1 &",
|
||||
},
|
||||
{
|
||||
name: "with env",
|
||||
ctx: ExecContext{EnvVars: map[string]string{"PORT": "9000"}},
|
||||
cmd: "node index.js",
|
||||
want: "PORT='9000' nohup /bin/sh -c 'node index.js' >/dev/null 2>&1 &",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := tc.ctx.StartCommand(tc.cmd)
|
||||
if got != tc.want {
|
||||
t.Errorf("StartCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShellescape(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{"simple", "'simple'"},
|
||||
{"/path/to/dir", "'/path/to/dir'"},
|
||||
{"it's fine", "'it'\\''s fine'"},
|
||||
{"", "''"},
|
||||
{"a'b'c", "'a'\\''b'\\''c'"},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
got := shellescape(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("shellescape(%q) = %q, want %q", tc.input, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
185
internal/recipe/executor.go
Normal file
185
internal/recipe/executor.go
Normal file
@ -0,0 +1,185 @@
|
||||
package recipe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"connectrpc.com/connect"
|
||||
|
||||
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
|
||||
)
|
||||
|
||||
// DefaultStepTimeout is the fallback timeout for RUN steps that carry no
|
||||
// explicit --timeout flag.
|
||||
const DefaultStepTimeout = 30 * time.Second
|
||||
|
||||
// BuildLogEntry is the per-step record stored in template_builds.logs (JSONB).
|
||||
type BuildLogEntry struct {
|
||||
Step int `json:"step"`
|
||||
Phase string `json:"phase"`
|
||||
Cmd string `json:"cmd"`
|
||||
Stdout string `json:"stdout"`
|
||||
Stderr string `json:"stderr"`
|
||||
Exit int32 `json:"exit"`
|
||||
Ok bool `json:"ok"`
|
||||
Elapsed int64 `json:"elapsed_ms"`
|
||||
}
|
||||
|
||||
// ExecFunc is the agent.Exec call signature used by the executor. It matches
|
||||
// the method on the hostagent Connect RPC client.
|
||||
type ExecFunc func(ctx context.Context, req *connect.Request[pb.ExecRequest]) (*connect.Response[pb.ExecResponse], error)
|
||||
|
||||
// Execute runs steps sequentially against sandboxID using execFn.
|
||||
//
|
||||
// - phase labels the log entries (e.g., "pre-build", "recipe", "post-build").
|
||||
// - startStep is the 1-based offset so entries are globally numbered across phases.
|
||||
// - defaultTimeout applies to RUN steps with no per-step --timeout; 0 → 10 minutes.
|
||||
// - bctx is mutated in place as ENV/WORKDIR steps execute, and carries forward
|
||||
// into subsequent phases when the caller passes the same pointer.
|
||||
//
|
||||
// Returns all log entries appended during this call, the next step counter
|
||||
// value, and whether all steps succeeded. On false the last entry contains
|
||||
// failure details; the caller is responsible for destroying the sandbox and
|
||||
// recording the build error.
|
||||
func Execute(
|
||||
ctx context.Context,
|
||||
phase string,
|
||||
steps []Step,
|
||||
sandboxID string,
|
||||
startStep int,
|
||||
defaultTimeout time.Duration,
|
||||
bctx *ExecContext,
|
||||
execFn ExecFunc,
|
||||
) (entries []BuildLogEntry, nextStep int, ok bool) {
|
||||
if defaultTimeout <= 0 {
|
||||
defaultTimeout = 10 * time.Minute
|
||||
}
|
||||
|
||||
step := startStep
|
||||
for _, st := range steps {
|
||||
step++
|
||||
slog.Info("executing build step", "phase", phase, "step", step, "instruction", st.Raw)
|
||||
|
||||
switch st.Kind {
|
||||
case KindENV:
|
||||
if bctx.EnvVars == nil {
|
||||
bctx.EnvVars = make(map[string]string)
|
||||
}
|
||||
bctx.EnvVars[st.Key] = st.Value
|
||||
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
|
||||
|
||||
case KindWORKDIR:
|
||||
bctx.WorkDir = st.Path
|
||||
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
|
||||
|
||||
case KindUSER, KindCOPY:
|
||||
verb := strings.ToUpper(strings.Fields(st.Raw)[0])
|
||||
entries = append(entries, BuildLogEntry{
|
||||
Step: step,
|
||||
Phase: phase,
|
||||
Cmd: st.Raw,
|
||||
Stderr: verb + " is not yet supported",
|
||||
Ok: false,
|
||||
})
|
||||
return entries, step, false
|
||||
|
||||
case KindSTART:
|
||||
entry, succeeded := execStart(ctx, st, sandboxID, phase, step, bctx, execFn)
|
||||
entries = append(entries, entry)
|
||||
if !succeeded {
|
||||
return entries, step, false
|
||||
}
|
||||
|
||||
case KindRUN:
|
||||
timeout := defaultTimeout
|
||||
if st.Timeout > 0 {
|
||||
timeout = st.Timeout
|
||||
}
|
||||
entry, succeeded := execRun(ctx, st, sandboxID, phase, step, timeout, bctx, execFn)
|
||||
entries = append(entries, entry)
|
||||
if !succeeded {
|
||||
return entries, step, false
|
||||
}
|
||||
}
|
||||
}
|
||||
return entries, step, true
|
||||
}
|
||||
|
||||
func execRun(
|
||||
ctx context.Context,
|
||||
st Step,
|
||||
sandboxID, phase string,
|
||||
step int,
|
||||
timeout time.Duration,
|
||||
bctx *ExecContext,
|
||||
execFn ExecFunc,
|
||||
) (BuildLogEntry, bool) {
|
||||
execCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
|
||||
SandboxId: sandboxID,
|
||||
Cmd: "/bin/sh",
|
||||
Args: []string{"-c", bctx.WrappedCommand(st.Shell)},
|
||||
TimeoutSec: int32(timeout.Seconds()),
|
||||
}))
|
||||
|
||||
entry := BuildLogEntry{
|
||||
Step: step,
|
||||
Phase: phase,
|
||||
Cmd: st.Raw,
|
||||
Elapsed: time.Since(start).Milliseconds(),
|
||||
}
|
||||
if err != nil {
|
||||
entry.Stderr = fmt.Sprintf("exec error: %v", err)
|
||||
return entry, false
|
||||
}
|
||||
entry.Stdout = string(resp.Msg.Stdout)
|
||||
entry.Stderr = string(resp.Msg.Stderr)
|
||||
entry.Exit = resp.Msg.ExitCode
|
||||
entry.Ok = resp.Msg.ExitCode == 0
|
||||
return entry, entry.Ok
|
||||
}
|
||||
|
||||
func execStart(
|
||||
ctx context.Context,
|
||||
st Step,
|
||||
sandboxID, phase string,
|
||||
step int,
|
||||
bctx *ExecContext,
|
||||
execFn ExecFunc,
|
||||
) (BuildLogEntry, bool) {
|
||||
// START uses a short timeout: just long enough for the shell to fork and
|
||||
// return. The background process itself runs indefinitely inside the VM.
|
||||
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
|
||||
SandboxId: sandboxID,
|
||||
Cmd: "/bin/sh",
|
||||
Args: []string{"-c", bctx.StartCommand(st.Shell)},
|
||||
TimeoutSec: 10,
|
||||
}))
|
||||
|
||||
entry := BuildLogEntry{
|
||||
Step: step,
|
||||
Phase: phase,
|
||||
Cmd: st.Raw,
|
||||
Elapsed: time.Since(start).Milliseconds(),
|
||||
}
|
||||
if err != nil {
|
||||
entry.Stderr = fmt.Sprintf("start error: %v", err)
|
||||
return entry, false
|
||||
}
|
||||
entry.Exit = resp.Msg.ExitCode
|
||||
entry.Ok = resp.Msg.ExitCode == 0
|
||||
if !entry.Ok {
|
||||
entry.Stderr = fmt.Sprintf("start failed with exit code %d: %s", resp.Msg.ExitCode, string(resp.Msg.Stderr))
|
||||
}
|
||||
return entry, entry.Ok
|
||||
}
|
||||
129
internal/recipe/step.go
Normal file
129
internal/recipe/step.go
Normal file
@ -0,0 +1,129 @@
|
||||
package recipe
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Kind identifies the instruction type in a recipe line.
|
||||
type Kind int
|
||||
|
||||
const (
|
||||
KindRUN Kind = iota // Execute a command and wait for it to exit.
|
||||
KindSTART // Start a command in the background (non-blocking).
|
||||
KindENV // Set an environment variable for subsequent steps.
|
||||
KindWORKDIR // Set the working directory for subsequent steps.
|
||||
KindUSER // Switch the unix user for subsequent steps. (stub)
|
||||
KindCOPY // Copy files into the sandbox. (stub)
|
||||
)
|
||||
|
||||
// Step is the parsed representation of one recipe instruction.
|
||||
type Step struct {
|
||||
Kind Kind
|
||||
Raw string // original string, preserved for logging
|
||||
Shell string // KindRUN, KindSTART: the shell command text
|
||||
Timeout time.Duration // KindRUN: 0 means use caller's default
|
||||
Key string // KindENV: variable name
|
||||
Value string // KindENV: variable value
|
||||
Path string // KindWORKDIR: directory path
|
||||
}
|
||||
|
||||
// ParseStep parses a single recipe instruction string into a Step.
|
||||
// Instructions are Dockerfile-like: a keyword followed by arguments.
|
||||
//
|
||||
// Supported syntax:
|
||||
//
|
||||
// RUN <cmd> — run command, wait for exit
|
||||
// RUN --timeout=<d> <cmd> — run command with explicit timeout (e.g. --timeout=5m)
|
||||
// START <cmd> — start command in background, return immediately
|
||||
// ENV <key>=<value> — set environment variable
|
||||
// WORKDIR <path> — set working directory
|
||||
// USER <name> — not yet supported
|
||||
// COPY <src> <dst> — not yet supported
|
||||
func ParseStep(s string) (Step, error) {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return Step{}, fmt.Errorf("empty step")
|
||||
}
|
||||
|
||||
// Split on first space to get the keyword.
|
||||
keyword, rest, _ := strings.Cut(s, " ")
|
||||
rest = strings.TrimSpace(rest)
|
||||
|
||||
switch strings.ToUpper(keyword) {
|
||||
case "RUN":
|
||||
return parseRUN(s, rest)
|
||||
case "START":
|
||||
return parseSTART(s, rest)
|
||||
case "ENV":
|
||||
return parseENV(s, rest)
|
||||
case "WORKDIR":
|
||||
return parseWORKDIR(s, rest)
|
||||
case "USER":
|
||||
return Step{Kind: KindUSER, Raw: s}, nil
|
||||
case "COPY":
|
||||
return Step{Kind: KindCOPY, Raw: s}, nil
|
||||
default:
|
||||
return Step{}, fmt.Errorf("unknown instruction %q (expected RUN, START, ENV, WORKDIR, USER, or COPY)", keyword)
|
||||
}
|
||||
}
|
||||
|
||||
// ParseRecipe parses all recipe lines, returning on the first error.
|
||||
func ParseRecipe(lines []string) ([]Step, error) {
|
||||
steps := make([]Step, 0, len(lines))
|
||||
for i, line := range lines {
|
||||
st, err := ParseStep(line)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("recipe line %d: %w", i+1, err)
|
||||
}
|
||||
steps = append(steps, st)
|
||||
}
|
||||
return steps, nil
|
||||
}
|
||||
|
||||
func parseRUN(raw, rest string) (Step, error) {
|
||||
var timeout time.Duration
|
||||
if strings.HasPrefix(rest, "--timeout=") {
|
||||
rest = rest[len("--timeout="):]
|
||||
flag, cmd, found := strings.Cut(rest, " ")
|
||||
if !found || strings.TrimSpace(cmd) == "" {
|
||||
return Step{}, fmt.Errorf("RUN --timeout= flag has no command: %q", raw)
|
||||
}
|
||||
d, err := time.ParseDuration(flag)
|
||||
if err != nil {
|
||||
return Step{}, fmt.Errorf("RUN --timeout= invalid duration %q: %w", flag, err)
|
||||
}
|
||||
timeout = d
|
||||
rest = strings.TrimSpace(cmd)
|
||||
}
|
||||
if rest == "" {
|
||||
return Step{}, fmt.Errorf("RUN requires a command: %q", raw)
|
||||
}
|
||||
return Step{Kind: KindRUN, Raw: raw, Shell: rest, Timeout: timeout}, nil
|
||||
}
|
||||
|
||||
func parseSTART(raw, rest string) (Step, error) {
|
||||
if rest == "" {
|
||||
return Step{}, fmt.Errorf("START requires a command: %q", raw)
|
||||
}
|
||||
return Step{Kind: KindSTART, Raw: raw, Shell: rest}, nil
|
||||
}
|
||||
|
||||
func parseENV(raw, rest string) (Step, error) {
|
||||
key, value, found := strings.Cut(rest, "=")
|
||||
if !found {
|
||||
return Step{}, fmt.Errorf("ENV requires KEY=VALUE format: %q", raw)
|
||||
}
|
||||
if key == "" {
|
||||
return Step{}, fmt.Errorf("ENV key is empty: %q", raw)
|
||||
}
|
||||
return Step{Kind: KindENV, Raw: raw, Key: key, Value: value}, nil
|
||||
}
|
||||
|
||||
func parseWORKDIR(raw, path string) (Step, error) {
|
||||
if path == "" {
|
||||
return Step{}, fmt.Errorf("WORKDIR requires a path: %q", raw)
|
||||
}
|
||||
return Step{Kind: KindWORKDIR, Raw: raw, Path: path}, nil
|
||||
}
|
||||
208
internal/recipe/step_test.go
Normal file
208
internal/recipe/step_test.go
Normal file
@ -0,0 +1,208 @@
|
||||
package recipe
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseStep(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want Step
|
||||
wantErr bool
|
||||
}{
|
||||
// RUN
|
||||
{
|
||||
name: "RUN basic",
|
||||
input: "RUN apt install -y curl",
|
||||
want: Step{Kind: KindRUN, Raw: "RUN apt install -y curl", Shell: "apt install -y curl"},
|
||||
},
|
||||
{
|
||||
name: "RUN lowercase",
|
||||
input: "run echo hello",
|
||||
want: Step{Kind: KindRUN, Raw: "run echo hello", Shell: "echo hello"},
|
||||
},
|
||||
{
|
||||
name: "RUN with timeout",
|
||||
input: "RUN --timeout=5m npm install",
|
||||
want: Step{Kind: KindRUN, Raw: "RUN --timeout=5m npm install", Shell: "npm install", Timeout: 5 * time.Minute},
|
||||
},
|
||||
{
|
||||
name: "RUN with timeout seconds",
|
||||
input: "RUN --timeout=30s make build",
|
||||
want: Step{Kind: KindRUN, Raw: "RUN --timeout=30s make build", Shell: "make build", Timeout: 30 * time.Second},
|
||||
},
|
||||
{
|
||||
name: "RUN no command",
|
||||
input: "RUN",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "RUN timeout no command",
|
||||
input: "RUN --timeout=5m",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "RUN invalid timeout",
|
||||
input: "RUN --timeout=notaduration echo hi",
|
||||
wantErr: true,
|
||||
},
|
||||
// START
|
||||
{
|
||||
name: "START basic",
|
||||
input: "START python3 app.py",
|
||||
want: Step{Kind: KindSTART, Raw: "START python3 app.py", Shell: "python3 app.py"},
|
||||
},
|
||||
{
|
||||
name: "START uppercase",
|
||||
input: "START node server.js --port=8080",
|
||||
want: Step{Kind: KindSTART, Raw: "START node server.js --port=8080", Shell: "node server.js --port=8080"},
|
||||
},
|
||||
{
|
||||
name: "START no command",
|
||||
input: "START",
|
||||
wantErr: true,
|
||||
},
|
||||
// ENV
|
||||
{
|
||||
name: "ENV basic",
|
||||
input: "ENV FOO=bar",
|
||||
want: Step{Kind: KindENV, Raw: "ENV FOO=bar", Key: "FOO", Value: "bar"},
|
||||
},
|
||||
{
|
||||
name: "ENV value with spaces",
|
||||
input: "ENV GREETING=hello world",
|
||||
want: Step{Kind: KindENV, Raw: "ENV GREETING=hello world", Key: "GREETING", Value: "hello world"},
|
||||
},
|
||||
{
|
||||
name: "ENV value with equals sign",
|
||||
input: "ENV URL=http://example.com?a=1",
|
||||
want: Step{Kind: KindENV, Raw: "ENV URL=http://example.com?a=1", Key: "URL", Value: "http://example.com?a=1"},
|
||||
},
|
||||
{
|
||||
name: "ENV empty value",
|
||||
input: "ENV FOO=",
|
||||
want: Step{Kind: KindENV, Raw: "ENV FOO=", Key: "FOO", Value: ""},
|
||||
},
|
||||
{
|
||||
name: "ENV missing equals",
|
||||
input: "ENV FOO",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "ENV empty key",
|
||||
input: "ENV =value",
|
||||
wantErr: true,
|
||||
},
|
||||
// WORKDIR
|
||||
{
|
||||
name: "WORKDIR basic",
|
||||
input: "WORKDIR /app",
|
||||
want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /app", Path: "/app"},
|
||||
},
|
||||
{
|
||||
name: "WORKDIR with spaces in path",
|
||||
input: "WORKDIR /my project",
|
||||
want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /my project", Path: "/my project"},
|
||||
},
|
||||
{
|
||||
name: "WORKDIR empty",
|
||||
input: "WORKDIR",
|
||||
wantErr: true,
|
||||
},
|
||||
// USER and COPY stubs
|
||||
{
|
||||
name: "USER stub",
|
||||
input: "USER www-data",
|
||||
want: Step{Kind: KindUSER, Raw: "USER www-data"},
|
||||
},
|
||||
{
|
||||
name: "COPY stub",
|
||||
input: "COPY config.yaml /etc/app/config.yaml",
|
||||
want: Step{Kind: KindCOPY, Raw: "COPY config.yaml /etc/app/config.yaml"},
|
||||
},
|
||||
// Unknown keyword
|
||||
{
|
||||
name: "unknown keyword",
|
||||
input: "FROBNICATE something",
|
||||
wantErr: true,
|
||||
},
|
||||
// Empty input
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, err := ParseStep(tc.input)
|
||||
if tc.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("ParseStep(%q) expected error, got %+v", tc.input, got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("ParseStep(%q) unexpected error: %v", tc.input, err)
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Errorf("ParseStep(%q)\n got %+v\n want %+v", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRecipe(t *testing.T) {
|
||||
t.Run("valid recipe", func(t *testing.T) {
|
||||
lines := []string{
|
||||
"RUN apt update",
|
||||
"WORKDIR /app",
|
||||
"ENV PORT=8080",
|
||||
"START python3 server.py",
|
||||
"RUN --timeout=2m pip install -r requirements.txt",
|
||||
}
|
||||
steps, err := ParseRecipe(lines)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(steps) != 5 {
|
||||
t.Fatalf("expected 5 steps, got %d", len(steps))
|
||||
}
|
||||
if steps[0].Kind != KindRUN {
|
||||
t.Errorf("step 0: want KindRUN, got %v", steps[0].Kind)
|
||||
}
|
||||
if steps[1].Kind != KindWORKDIR {
|
||||
t.Errorf("step 1: want KindWORKDIR, got %v", steps[1].Kind)
|
||||
}
|
||||
if steps[3].Kind != KindSTART {
|
||||
t.Errorf("step 3: want KindSTART, got %v", steps[3].Kind)
|
||||
}
|
||||
if steps[4].Timeout != 2*time.Minute {
|
||||
t.Errorf("step 4: want 2m timeout, got %v", steps[4].Timeout)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("error on invalid line", func(t *testing.T) {
|
||||
lines := []string{
|
||||
"RUN apt update",
|
||||
"BADCMD something",
|
||||
}
|
||||
_, err := ParseRecipe(lines)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid line, got nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty recipe", func(t *testing.T) {
|
||||
steps, err := ParseRecipe(nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(steps) != 0 {
|
||||
t.Fatalf("expected 0 steps, got %d", len(steps))
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"connectrpc.com/connect"
|
||||
@ -14,6 +15,7 @@ import (
|
||||
"git.omukk.dev/wrenn/sandbox/internal/db"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/id"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/lifecycle"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/recipe"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/scheduler"
|
||||
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
|
||||
)
|
||||
@ -27,14 +29,14 @@ const (
|
||||
|
||||
// preBuildCmds run before the user recipe to prepare the build environment.
|
||||
var preBuildCmds = []string{
|
||||
"apt update",
|
||||
"RUN apt update",
|
||||
}
|
||||
|
||||
// postBuildCmds run after the user recipe to clean up caches and reduce image size.
|
||||
var postBuildCmds = []string{
|
||||
"apt clean",
|
||||
"apt autoremove -y",
|
||||
"rm -rf /var/lib/apt/lists/*",
|
||||
"RUN apt clean",
|
||||
"RUN apt autoremove -y",
|
||||
"RUN rm -rf /var/lib/apt/lists/*",
|
||||
}
|
||||
|
||||
// buildAgentClient is the subset of the host agent client used by the build worker.
|
||||
@ -46,24 +48,15 @@ type buildAgentClient interface {
|
||||
FlattenRootfs(ctx context.Context, req *connect.Request[pb.FlattenRootfsRequest]) (*connect.Response[pb.FlattenRootfsResponse], error)
|
||||
}
|
||||
|
||||
// BuildLogEntry represents a single entry in the build log JSONB array.
|
||||
type BuildLogEntry struct {
|
||||
Step int `json:"step"`
|
||||
Phase string `json:"phase"` // "pre-build", "recipe", or "post-build"
|
||||
Cmd string `json:"cmd"`
|
||||
Stdout string `json:"stdout"`
|
||||
Stderr string `json:"stderr"`
|
||||
Exit int32 `json:"exit"`
|
||||
Ok bool `json:"ok"`
|
||||
Elapsed int64 `json:"elapsed_ms"`
|
||||
}
|
||||
|
||||
// BuildService handles template build orchestration.
|
||||
type BuildService struct {
|
||||
DB *db.Queries
|
||||
Redis *redis.Client
|
||||
Pool *lifecycle.HostClientPool
|
||||
Scheduler scheduler.HostScheduler
|
||||
|
||||
mu sync.Mutex
|
||||
cancelMap map[string]context.CancelFunc // buildID → per-build cancel func
|
||||
}
|
||||
|
||||
// BuildCreateParams holds the parameters for creating a template build.
|
||||
@ -74,6 +67,7 @@ type BuildCreateParams struct {
|
||||
Healthcheck string
|
||||
VCPUs int32
|
||||
MemoryMB int32
|
||||
SkipPrePost bool
|
||||
}
|
||||
|
||||
// Create inserts a new build record and enqueues it to Redis.
|
||||
@ -97,6 +91,11 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp
|
||||
buildIDStr := id.FormatBuildID(buildID)
|
||||
newTemplateID := id.NewTemplateID()
|
||||
|
||||
defaultSteps := len(preBuildCmds) + len(postBuildCmds)
|
||||
if p.SkipPrePost {
|
||||
defaultSteps = 0
|
||||
}
|
||||
|
||||
build, err := s.DB.InsertTemplateBuild(ctx, db.InsertTemplateBuildParams{
|
||||
ID: buildID,
|
||||
Name: p.Name,
|
||||
@ -105,9 +104,10 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp
|
||||
Healthcheck: p.Healthcheck,
|
||||
Vcpus: p.VCPUs,
|
||||
MemoryMb: p.MemoryMB,
|
||||
TotalSteps: int32(len(p.Recipe) + len(preBuildCmds) + len(postBuildCmds)),
|
||||
TotalSteps: int32(len(p.Recipe) + defaultSteps),
|
||||
TemplateID: newTemplateID,
|
||||
TeamID: id.PlatformTeamID,
|
||||
SkipPrePost: p.SkipPrePost,
|
||||
})
|
||||
if err != nil {
|
||||
return db.TemplateBuild{}, fmt.Errorf("insert build: %w", err)
|
||||
@ -131,6 +131,40 @@ func (s *BuildService) List(ctx context.Context) ([]db.TemplateBuild, error) {
|
||||
return s.DB.ListTemplateBuilds(ctx)
|
||||
}
|
||||
|
||||
// Cancel cancels a pending or running build. For pending builds the status is
|
||||
// updated in the DB and the worker skips it when dequeued. For running builds
|
||||
// the per-build context is cancelled, which causes the current exec step to
|
||||
// abort; executeBuild then detects the cancellation and records the status.
|
||||
func (s *BuildService) Cancel(ctx context.Context, buildID pgtype.UUID) error {
|
||||
build, err := s.DB.GetTemplateBuild(ctx, buildID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get build: %w", err)
|
||||
}
|
||||
switch build.Status {
|
||||
case "success", "failed", "cancelled":
|
||||
return fmt.Errorf("build is already %s", build.Status)
|
||||
}
|
||||
|
||||
// Mark cancelled in DB first. This handles both pending builds (which haven't
|
||||
// been picked up yet) and acts as a flag for executeBuild to check on start.
|
||||
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{
|
||||
ID: buildID, Status: "cancelled",
|
||||
}); err != nil {
|
||||
return fmt.Errorf("update build status: %w", err)
|
||||
}
|
||||
|
||||
// If the build is currently running, signal its context.
|
||||
buildIDStr := id.FormatBuildID(buildID)
|
||||
s.mu.Lock()
|
||||
cancel, running := s.cancelMap[buildIDStr]
|
||||
s.mu.Unlock()
|
||||
if running {
|
||||
cancel()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartWorkers launches n goroutines that consume from the Redis build queue.
|
||||
// The returned cancel function stops all workers.
|
||||
func (s *BuildService) StartWorkers(ctx context.Context, n int) context.CancelFunc {
|
||||
@ -172,14 +206,38 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
return
|
||||
}
|
||||
|
||||
build, err := s.DB.GetTemplateBuild(ctx, buildID)
|
||||
// Create a per-build context so this build can be cancelled independently of
|
||||
// the worker. Register in cancelMap before fetching the build so that a
|
||||
// concurrent Cancel call can always find and signal it.
|
||||
buildCtx, buildCancel := context.WithCancel(ctx)
|
||||
defer buildCancel()
|
||||
|
||||
s.mu.Lock()
|
||||
if s.cancelMap == nil {
|
||||
s.cancelMap = make(map[string]context.CancelFunc)
|
||||
}
|
||||
s.cancelMap[buildIDStr] = buildCancel
|
||||
s.mu.Unlock()
|
||||
defer func() {
|
||||
s.mu.Lock()
|
||||
delete(s.cancelMap, buildIDStr)
|
||||
s.mu.Unlock()
|
||||
}()
|
||||
|
||||
build, err := s.DB.GetTemplateBuild(buildCtx, buildID)
|
||||
if err != nil {
|
||||
log.Error("failed to fetch build", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Skip if already cancelled (Cancel was called before we dequeued).
|
||||
if build.Status == "cancelled" {
|
||||
log.Info("build already cancelled, skipping")
|
||||
return
|
||||
}
|
||||
|
||||
// Mark as running.
|
||||
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{
|
||||
if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{
|
||||
ID: buildID, Status: "running",
|
||||
}); err != nil {
|
||||
log.Error("failed to update build status", "error", err)
|
||||
@ -187,22 +245,22 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
}
|
||||
|
||||
// Parse user recipe.
|
||||
var recipe []string
|
||||
if err := json.Unmarshal(build.Recipe, &recipe); err != nil {
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err))
|
||||
var userRecipe []string
|
||||
if err := json.Unmarshal(build.Recipe, &userRecipe); err != nil {
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Pick a platform host and create a sandbox.
|
||||
host, err := s.Scheduler.SelectHost(ctx, id.PlatformTeamID, false)
|
||||
host, err := s.Scheduler.SelectHost(buildCtx, id.PlatformTeamID, false)
|
||||
if err != nil {
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("no host available: %v", err))
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("no host available: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
agent, err := s.Pool.GetForHost(host)
|
||||
if err != nil {
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("agent client error: %v", err))
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("agent client error: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
@ -214,16 +272,16 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
baseTeamID := id.PlatformTeamID
|
||||
baseTemplateID := id.MinimalTemplateID
|
||||
if build.BaseTemplate != "minimal" {
|
||||
baseTmpl, err := s.DB.GetPlatformTemplateByName(ctx, build.BaseTemplate)
|
||||
baseTmpl, err := s.DB.GetPlatformTemplateByName(buildCtx, build.BaseTemplate)
|
||||
if err != nil {
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err))
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err))
|
||||
return
|
||||
}
|
||||
baseTeamID = baseTmpl.TeamID
|
||||
baseTemplateID = baseTmpl.ID
|
||||
}
|
||||
|
||||
resp, err := agent.CreateSandbox(ctx, connect.NewRequest(&pb.CreateSandboxRequest{
|
||||
resp, err := agent.CreateSandbox(buildCtx, connect.NewRequest(&pb.CreateSandboxRequest{
|
||||
SandboxId: sandboxIDStr,
|
||||
Template: build.BaseTemplate,
|
||||
TeamId: id.UUIDString(baseTeamID),
|
||||
@ -234,129 +292,121 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
DiskSizeMb: 5120, // 5 GB for template builds
|
||||
}))
|
||||
if err != nil {
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("create sandbox failed: %v", err))
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("create sandbox failed: %v", err))
|
||||
return
|
||||
}
|
||||
_ = resp
|
||||
|
||||
// Record sandbox/host association.
|
||||
_ = s.DB.UpdateBuildSandbox(ctx, db.UpdateBuildSandboxParams{
|
||||
_ = s.DB.UpdateBuildSandbox(buildCtx, db.UpdateBuildSandboxParams{
|
||||
ID: buildID,
|
||||
SandboxID: sandboxID,
|
||||
HostID: host.ID,
|
||||
})
|
||||
|
||||
// Parse recipe steps. preBuildCmds and postBuildCmds are hardcoded and always
|
||||
// valid; panic on error is appropriate here since it would be a programmer mistake.
|
||||
preBuildSteps, err := recipe.ParseRecipe(preBuildCmds)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("invalid pre-build recipe: %v", err))
|
||||
}
|
||||
userRecipeSteps, err := recipe.ParseRecipe(userRecipe)
|
||||
if err != nil {
|
||||
s.destroySandbox(buildCtx, agent, sandboxIDStr)
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("recipe parse error: %v", err))
|
||||
return
|
||||
}
|
||||
postBuildSteps, err := recipe.ParseRecipe(postBuildCmds)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("invalid post-build recipe: %v", err))
|
||||
}
|
||||
|
||||
// Execute build phases: pre-build → user recipe → post-build.
|
||||
var logs []BuildLogEntry
|
||||
// bctx carries working directory and env vars across all phases.
|
||||
var logs []recipe.BuildLogEntry
|
||||
step := 0
|
||||
bctx := &recipe.ExecContext{}
|
||||
|
||||
// Helper to run a list of commands in a given phase.
|
||||
// timeout=0 means no timeout (uses parent context).
|
||||
runPhase := func(phase string, cmds []string, timeout time.Duration) bool {
|
||||
for _, cmd := range cmds {
|
||||
step++
|
||||
log.Info("executing build step", "phase", phase, "step", step, "cmd", cmd)
|
||||
|
||||
execCtx := ctx
|
||||
var cancel context.CancelFunc
|
||||
// When no timeout is specified, use 10 minutes as a generous upper
|
||||
// bound. The host agent defaults TimeoutSec=0 to 30s, so we must
|
||||
// always send an explicit value.
|
||||
effectiveTimeout := timeout
|
||||
if effectiveTimeout <= 0 {
|
||||
effectiveTimeout = 10 * time.Minute
|
||||
}
|
||||
execCtx, cancel = context.WithTimeout(ctx, effectiveTimeout)
|
||||
timeoutSec := int32(effectiveTimeout.Seconds())
|
||||
|
||||
start := time.Now()
|
||||
execResp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{
|
||||
SandboxId: sandboxIDStr,
|
||||
Cmd: "/bin/sh",
|
||||
Args: []string{"-c", cmd},
|
||||
TimeoutSec: timeoutSec,
|
||||
}))
|
||||
cancel()
|
||||
|
||||
entry := BuildLogEntry{
|
||||
Step: step,
|
||||
Phase: phase,
|
||||
Cmd: cmd,
|
||||
Elapsed: time.Since(start).Milliseconds(),
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
entry.Stderr = err.Error()
|
||||
entry.Ok = false
|
||||
logs = append(logs, entry)
|
||||
s.updateLogs(ctx, buildID, step, logs)
|
||||
s.destroySandbox(ctx, agent, sandboxIDStr)
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed: %v", phase, step, err))
|
||||
runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool {
|
||||
newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec)
|
||||
logs = append(logs, newEntries...)
|
||||
step = nextStep
|
||||
s.updateLogs(buildCtx, buildID, step, logs)
|
||||
if !ok {
|
||||
s.destroySandbox(buildCtx, agent, sandboxIDStr)
|
||||
// If the build was cancelled, status is already set — don't overwrite with "failed".
|
||||
if buildCtx.Err() != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
entry.Stdout = string(execResp.Msg.Stdout)
|
||||
entry.Stderr = string(execResp.Msg.Stderr)
|
||||
entry.Exit = execResp.Msg.ExitCode
|
||||
entry.Ok = execResp.Msg.ExitCode == 0
|
||||
logs = append(logs, entry)
|
||||
s.updateLogs(ctx, buildID, step, logs)
|
||||
|
||||
if execResp.Msg.ExitCode != 0 {
|
||||
s.destroySandbox(ctx, agent, sandboxIDStr)
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed with exit code %d", phase, step, execResp.Msg.ExitCode))
|
||||
return false
|
||||
last := newEntries[len(newEntries)-1]
|
||||
reason := last.Stderr
|
||||
if reason == "" {
|
||||
reason = fmt.Sprintf("exit code %d", last.Exit)
|
||||
}
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("%s step %d failed: %s", phase, step, reason))
|
||||
}
|
||||
return true
|
||||
return ok
|
||||
}
|
||||
|
||||
if !runPhase("pre-build", preBuildCmds, 0) {
|
||||
if !build.SkipPrePost {
|
||||
if !runPhase("pre-build", preBuildSteps, 0) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !runPhase("recipe", userRecipeSteps, buildCommandTimeout) {
|
||||
return
|
||||
}
|
||||
if !runPhase("recipe", recipe, buildCommandTimeout) {
|
||||
return
|
||||
}
|
||||
if !runPhase("post-build", postBuildCmds, 0) {
|
||||
return
|
||||
if !build.SkipPrePost {
|
||||
if !runPhase("post-build", postBuildSteps, 0) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Healthcheck or direct snapshot.
|
||||
var sizeBytes int64
|
||||
if build.Healthcheck != "" {
|
||||
log.Info("running healthcheck", "cmd", build.Healthcheck)
|
||||
if err := s.waitForHealthcheck(ctx, agent, sandboxIDStr, build.Healthcheck); err != nil {
|
||||
s.destroySandbox(ctx, agent, sandboxIDStr)
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("healthcheck failed: %v", err))
|
||||
if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil {
|
||||
s.destroySandbox(buildCtx, agent, sandboxIDStr)
|
||||
if buildCtx.Err() != nil {
|
||||
return
|
||||
}
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("healthcheck failed: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Healthcheck passed → full snapshot (with memory/CPU state).
|
||||
log.Info("healthcheck passed, creating snapshot")
|
||||
snapResp, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{
|
||||
snapResp, err := agent.CreateSnapshot(buildCtx, connect.NewRequest(&pb.CreateSnapshotRequest{
|
||||
SandboxId: sandboxIDStr,
|
||||
Name: build.Name,
|
||||
TeamId: id.UUIDString(build.TeamID),
|
||||
TemplateId: id.UUIDString(build.TemplateID),
|
||||
}))
|
||||
if err != nil {
|
||||
s.destroySandbox(ctx, agent, sandboxIDStr)
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
|
||||
s.destroySandbox(buildCtx, agent, sandboxIDStr)
|
||||
if buildCtx.Err() != nil {
|
||||
return
|
||||
}
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
|
||||
return
|
||||
}
|
||||
sizeBytes = snapResp.Msg.SizeBytes
|
||||
} else {
|
||||
// No healthcheck → image-only template (rootfs only).
|
||||
log.Info("no healthcheck, flattening rootfs")
|
||||
flatResp, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{
|
||||
flatResp, err := agent.FlattenRootfs(buildCtx, connect.NewRequest(&pb.FlattenRootfsRequest{
|
||||
SandboxId: sandboxIDStr,
|
||||
Name: build.Name,
|
||||
TeamId: id.UUIDString(build.TeamID),
|
||||
TemplateId: id.UUIDString(build.TemplateID),
|
||||
}))
|
||||
if err != nil {
|
||||
s.destroySandbox(ctx, agent, sandboxIDStr)
|
||||
s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
|
||||
s.destroySandbox(buildCtx, agent, sandboxIDStr)
|
||||
if buildCtx.Err() != nil {
|
||||
return
|
||||
}
|
||||
s.failBuild(buildCtx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
|
||||
return
|
||||
}
|
||||
sizeBytes = flatResp.Msg.SizeBytes
|
||||
@ -368,7 +418,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
templateType = "snapshot"
|
||||
}
|
||||
|
||||
if _, err := s.DB.InsertTemplate(ctx, db.InsertTemplateParams{
|
||||
if _, err := s.DB.InsertTemplate(buildCtx, db.InsertTemplateParams{
|
||||
ID: build.TemplateID,
|
||||
Name: build.Name,
|
||||
Type: templateType,
|
||||
@ -386,7 +436,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
|
||||
// No additional destroy needed.
|
||||
|
||||
// Mark build as success.
|
||||
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{
|
||||
if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{
|
||||
ID: buildID, Status: "success",
|
||||
}); err != nil {
|
||||
log.Error("failed to mark build as success", "error", err)
|
||||
@ -429,7 +479,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC
|
||||
}
|
||||
}
|
||||
|
||||
func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []BuildLogEntry) {
|
||||
func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []recipe.BuildLogEntry) {
|
||||
logsJSON, err := json.Marshal(logs)
|
||||
if err != nil {
|
||||
slog.Warn("failed to marshal build logs", "error", err)
|
||||
|
||||
Reference in New Issue
Block a user