diff --git a/db/migrations/20260330150223_build_options.sql b/db/migrations/20260330150223_build_options.sql new file mode 100644 index 0000000..981ad06 --- /dev/null +++ b/db/migrations/20260330150223_build_options.sql @@ -0,0 +1,11 @@ +-- +goose Up + +-- Allow completed_at to be set when a build is cancelled. +-- (The UpdateBuildStatus query is updated in sqlc; no schema change needed for that.) + +-- Add skip_pre_post flag: when true, the pre-build and post-build command phases +-- are skipped for this build. +ALTER TABLE template_builds ADD COLUMN skip_pre_post BOOLEAN NOT NULL DEFAULT FALSE; + +-- +goose Down +ALTER TABLE template_builds DROP COLUMN skip_pre_post; diff --git a/db/queries/template_builds.sql b/db/queries/template_builds.sql index be1c09e..1fb07be 100644 --- a/db/queries/template_builds.sql +++ b/db/queries/template_builds.sql @@ -1,6 +1,6 @@ -- name: InsertTemplateBuild :one -INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id) -VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10) +INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id, skip_pre_post) +VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10, $11) RETURNING *; -- name: GetTemplateBuild :one @@ -12,8 +12,8 @@ SELECT * FROM template_builds ORDER BY created_at DESC; -- name: UpdateBuildStatus :one UPDATE template_builds SET status = $2, - started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, - completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END + started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, + completed_at = CASE WHEN $2 IN ('success', 'failed', 'cancelled') THEN NOW() ELSE completed_at END WHERE id = $1 RETURNING *; diff --git a/frontend/src/lib/api/builds.ts b/frontend/src/lib/api/builds.ts index 349c6e1..1de23b8 100644 --- a/frontend/src/lib/api/builds.ts +++ b/frontend/src/lib/api/builds.ts @@ -38,6 +38,7 @@ export type CreateBuildParams = { healthcheck?: string; vcpus?: number; memory_mb?: number; + skip_pre_post?: boolean; }; export async function createBuild(params: CreateBuildParams): Promise> { @@ -69,3 +70,7 @@ export async function listAdminTemplates(): Promise> export async function deleteAdminTemplate(name: string): Promise> { return apiFetch('DELETE', `/api/v1/admin/templates/${name}`); } + +export async function cancelBuild(id: string): Promise> { + return apiFetch('POST', `/api/v1/admin/builds/${id}/cancel`); +} diff --git a/frontend/src/routes/admin/templates/+page.svelte b/frontend/src/routes/admin/templates/+page.svelte index dde8fc3..4619e7b 100644 --- a/frontend/src/routes/admin/templates/+page.svelte +++ b/frontend/src/routes/admin/templates/+page.svelte @@ -6,6 +6,7 @@ import { listBuilds, createBuild, + cancelBuild, listAdminTemplates, deleteAdminTemplate, type Build, @@ -52,11 +53,15 @@ vcpus: 1, memory_mb: 512, recipe: '', - healthcheck: '' + healthcheck: '', + skip_pre_post: false }); let creating = $state(false); let createError = $state(null); + // Cancel build state + let cancelingBuildId = $state(null); + // Stats let templateCount = $derived(templates.length); let snapshotCount = $derived(templates.filter((t) => t.type === 'snapshot').length); @@ -123,12 +128,13 @@ recipe: lines, healthcheck: createForm.healthcheck.trim() || undefined, vcpus: createForm.vcpus, - memory_mb: createForm.memory_mb + memory_mb: createForm.memory_mb, + skip_pre_post: createForm.skip_pre_post }); if (result.ok) { showCreate = false; - createForm = { name: '', base_template: 'minimal', vcpus: 1, memory_mb: 512, recipe: '', healthcheck: '' }; + createForm = { name: '', base_template: 'minimal', vcpus: 1, memory_mb: 512, recipe: '', healthcheck: '', skip_pre_post: false }; builds = [result.data, ...builds]; activeTab = 'builds'; expandedBuildId = result.data.id; @@ -156,6 +162,18 @@ deleting = false; } + async function handleCancelBuild(buildId: string) { + cancelingBuildId = buildId; + const result = await cancelBuild(buildId); + if (result.ok) { + builds = builds.map((b) => b.id === buildId ? { ...b, status: 'cancelled' } : b); + toast.success('Build cancelled'); + } else { + toast.error(result.error ?? 'Failed to cancel build'); + } + cancelingBuildId = null; + } + function toggleBuildExpand(buildId: string) { if (expandedBuildId === buildId) { expandedBuildId = null; @@ -198,10 +216,28 @@ case 'success': return 'var(--color-accent-bright)'; case 'failed': return 'var(--color-red)'; case 'running': return 'var(--color-blue)'; + case 'cancelled': return 'var(--color-amber)'; default: return 'var(--color-text-muted)'; } } + // Returns [keyword, rest] from a recipe instruction string. + function splitInstruction(cmd: string): [string, string] { + const idx = cmd.indexOf(' '); + if (idx === -1) return [cmd.toUpperCase(), '']; + return [cmd.slice(0, idx).toUpperCase(), cmd.slice(idx + 1)]; + } + + function keywordColor(keyword: string): string { + switch (keyword) { + case 'RUN': return 'var(--color-blue)'; + case 'START': return 'var(--color-accent-bright)'; + case 'ENV': return 'var(--color-amber)'; + case 'WORKDIR': return 'var(--color-text-tertiary)'; + default: return 'var(--color-text-muted)'; + } + } + onMount(() => { fetchTemplates(); fetchBuilds().then(startPolling); @@ -512,6 +548,22 @@
+ {#if build.status === 'pending' || build.status === 'running'} +
+ +
+ {/if} {#if build.error}
{build.error} @@ -524,6 +576,7 @@ {@const isInternal = log.phase === 'pre-build' || log.phase === 'post-build'} {@const recipeIdx = log.phase === 'recipe' ? build.logs.filter(l => l.phase === 'recipe' && l.step <= log.step).length : 0} {@const phaseLabel = isInternal ? (log.phase === 'pre-build' ? 'Pre-build' : 'Post-build') : `Step ${recipeIdx}`} + {@const [kw, kwRest] = splitInstruction(log.cmd)}
+ +
diff --git a/internal/api/handlers_builds.go b/internal/api/handlers_builds.go index 3b96400..282c3f4 100644 --- a/internal/api/handlers_builds.go +++ b/internal/api/handlers_builds.go @@ -36,6 +36,7 @@ type createBuildRequest struct { Healthcheck string `json:"healthcheck"` VCPUs int32 `json:"vcpus"` MemoryMB int32 `json:"memory_mb"` + SkipPrePost bool `json:"skip_pre_post"` } type buildResponse struct { @@ -127,6 +128,7 @@ func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) { Healthcheck: req.Healthcheck, VCPUs: req.VCPUs, MemoryMB: req.MemoryMB, + SkipPrePost: req.SkipPrePost, }) if err != nil { slog.Error("failed to create build", "error", err) @@ -254,3 +256,21 @@ func (h *buildHandler) DeleteTemplate(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } + +// Cancel handles POST /v1/admin/builds/{id}/cancel. +func (h *buildHandler) Cancel(w http.ResponseWriter, r *http.Request) { + buildIDStr := chi.URLParam(r, "id") + + buildID, err := id.ParseBuildID(buildIDStr) + if err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "invalid build ID") + return + } + + if err := h.svc.Cancel(r.Context(), buildID); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", err.Error()) + return + } + + w.WriteHeader(http.StatusNoContent) +} diff --git a/internal/db/template_builds.sql.go b/internal/db/template_builds.sql.go index 7aa1b67..facfb19 100644 --- a/internal/db/template_builds.sql.go +++ b/internal/db/template_builds.sql.go @@ -12,7 +12,7 @@ import ( ) const getTemplateBuild = `-- name: GetTemplateBuild :one -SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds WHERE id = $1 +SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds WHERE id = $1 ` func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (TemplateBuild, error) { @@ -38,14 +38,15 @@ func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (Templat &i.CompletedAt, &i.TemplateID, &i.TeamID, + &i.SkipPrePost, ) return i, err } const insertTemplateBuild = `-- name: InsertTemplateBuild :one -INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id) -VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10) -RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id +INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id, skip_pre_post) +VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10, $11) +RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post ` type InsertTemplateBuildParams struct { @@ -59,6 +60,7 @@ type InsertTemplateBuildParams struct { TotalSteps int32 `json:"total_steps"` TemplateID pgtype.UUID `json:"template_id"` TeamID pgtype.UUID `json:"team_id"` + SkipPrePost bool `json:"skip_pre_post"` } func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBuildParams) (TemplateBuild, error) { @@ -73,6 +75,7 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui arg.TotalSteps, arg.TemplateID, arg.TeamID, + arg.SkipPrePost, ) var i TemplateBuild err := row.Scan( @@ -95,12 +98,13 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui &i.CompletedAt, &i.TemplateID, &i.TeamID, + &i.SkipPrePost, ) return i, err } const listTemplateBuilds = `-- name: ListTemplateBuilds :many -SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds ORDER BY created_at DESC +SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds ORDER BY created_at DESC ` func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, error) { @@ -132,6 +136,7 @@ func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, erro &i.CompletedAt, &i.TemplateID, &i.TeamID, + &i.SkipPrePost, ); err != nil { return nil, err } @@ -196,10 +201,10 @@ func (q *Queries) UpdateBuildSandbox(ctx context.Context, arg UpdateBuildSandbox const updateBuildStatus = `-- name: UpdateBuildStatus :one UPDATE template_builds SET status = $2, - started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, - completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END + started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, + completed_at = CASE WHEN $2 IN ('success', 'failed', 'cancelled') THEN NOW() ELSE completed_at END WHERE id = $1 -RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id +RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post ` type UpdateBuildStatusParams struct { @@ -230,6 +235,7 @@ func (q *Queries) UpdateBuildStatus(ctx context.Context, arg UpdateBuildStatusPa &i.CompletedAt, &i.TemplateID, &i.TeamID, + &i.SkipPrePost, ) return i, err } diff --git a/internal/recipe/context.go b/internal/recipe/context.go new file mode 100644 index 0000000..db4c39c --- /dev/null +++ b/internal/recipe/context.go @@ -0,0 +1,63 @@ +package recipe + +import "strings" + +// ExecContext holds mutable state that persists across recipe steps. +// It is initialized empty and updated by ENV and WORKDIR steps. +type ExecContext struct { + WorkDir string + EnvVars map[string]string +} + +// WrappedCommand returns the full shell command for a RUN step with context +// applied. The result is passed as the argument to /bin/sh -c. +// +// If WORKDIR and/or ENV are set, they are prepended as a shell preamble: +// +// cd '/the/dir' && KEY='val' /bin/sh -c 'original command' +func (c *ExecContext) WrappedCommand(cmd string) string { + prefix := c.shellPrefix() + if prefix == "" { + return cmd + } + return prefix + "/bin/sh -c " + shellescape(cmd) +} + +// StartCommand returns the shell command for a START step. The process is +// launched in the background via nohup so that the outer shell exits +// immediately, allowing the build to continue. stdout/stderr of the +// background process are discarded (the process keeps running in the VM). +// +// Multiple START steps can be issued to run several background processes +// simultaneously before a healthcheck is evaluated. +func (c *ExecContext) StartCommand(cmd string) string { + prefix := c.shellPrefix() + return prefix + "nohup /bin/sh -c " + shellescape(cmd) + " >/dev/null 2>&1 &" +} + +// shellPrefix builds the "cd ... && KEY=val " preamble for a shell command. +// Returns an empty string when no context is set. +func (c *ExecContext) shellPrefix() string { + if c.WorkDir == "" && len(c.EnvVars) == 0 { + return "" + } + var sb strings.Builder + if c.WorkDir != "" { + sb.WriteString("cd ") + sb.WriteString(shellescape(c.WorkDir)) + sb.WriteString(" && ") + } + for k, v := range c.EnvVars { + sb.WriteString(k) + sb.WriteByte('=') + sb.WriteString(shellescape(v)) + sb.WriteByte(' ') + } + return sb.String() +} + +// shellescape wraps s in single quotes, escaping any embedded single quotes. +// This is POSIX-safe for paths, env values, and shell commands. +func shellescape(s string) string { + return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" +} diff --git a/internal/recipe/context_test.go b/internal/recipe/context_test.go new file mode 100644 index 0000000..b00dfce --- /dev/null +++ b/internal/recipe/context_test.go @@ -0,0 +1,114 @@ +package recipe + +import "testing" + +func TestExecContext_WrappedCommand(t *testing.T) { + tests := []struct { + name string + ctx ExecContext + cmd string + want string + }{ + { + name: "no context", + ctx: ExecContext{}, + cmd: "apt install -y curl", + want: "apt install -y curl", + }, + { + name: "workdir only", + ctx: ExecContext{WorkDir: "/app"}, + cmd: "npm install", + want: "cd '/app' && /bin/sh -c 'npm install'", + }, + { + name: "env only", + ctx: ExecContext{EnvVars: map[string]string{"PORT": "8080"}}, + cmd: "node server.js", + want: "PORT='8080' /bin/sh -c 'node server.js'", + }, + { + name: "workdir with space", + ctx: ExecContext{WorkDir: "/my project"}, + cmd: "make build", + want: "cd '/my project' && /bin/sh -c 'make build'", + }, + { + name: "command with single quotes", + ctx: ExecContext{WorkDir: "/app"}, + cmd: "echo 'hello'", + want: "cd '/app' && /bin/sh -c 'echo '\\''hello'\\'''", + }, + { + name: "env value with single quotes", + ctx: ExecContext{EnvVars: map[string]string{"MSG": "it's fine"}}, + cmd: "echo $MSG", + want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := tc.ctx.WrappedCommand(tc.cmd) + if got != tc.want { + t.Errorf("WrappedCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want) + } + }) + } +} + +func TestExecContext_StartCommand(t *testing.T) { + tests := []struct { + name string + ctx ExecContext + cmd string + want string + }{ + { + name: "no context", + ctx: ExecContext{}, + cmd: "python3 app.py", + want: "nohup /bin/sh -c 'python3 app.py' >/dev/null 2>&1 &", + }, + { + name: "with workdir", + ctx: ExecContext{WorkDir: "/app"}, + cmd: "python3 server.py", + want: "cd '/app' && nohup /bin/sh -c 'python3 server.py' >/dev/null 2>&1 &", + }, + { + name: "with env", + ctx: ExecContext{EnvVars: map[string]string{"PORT": "9000"}}, + cmd: "node index.js", + want: "PORT='9000' nohup /bin/sh -c 'node index.js' >/dev/null 2>&1 &", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := tc.ctx.StartCommand(tc.cmd) + if got != tc.want { + t.Errorf("StartCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want) + } + }) + } +} + +func TestShellescape(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"simple", "'simple'"}, + {"/path/to/dir", "'/path/to/dir'"}, + {"it's fine", "'it'\\''s fine'"}, + {"", "''"}, + {"a'b'c", "'a'\\''b'\\''c'"}, + } + for _, tc := range tests { + got := shellescape(tc.input) + if got != tc.want { + t.Errorf("shellescape(%q) = %q, want %q", tc.input, got, tc.want) + } + } +} diff --git a/internal/recipe/executor.go b/internal/recipe/executor.go new file mode 100644 index 0000000..3df45dc --- /dev/null +++ b/internal/recipe/executor.go @@ -0,0 +1,185 @@ +package recipe + +import ( + "context" + "fmt" + "log/slog" + "strings" + "time" + + "connectrpc.com/connect" + + pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen" +) + +// DefaultStepTimeout is the fallback timeout for RUN steps that carry no +// explicit --timeout flag. +const DefaultStepTimeout = 30 * time.Second + +// BuildLogEntry is the per-step record stored in template_builds.logs (JSONB). +type BuildLogEntry struct { + Step int `json:"step"` + Phase string `json:"phase"` + Cmd string `json:"cmd"` + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + Exit int32 `json:"exit"` + Ok bool `json:"ok"` + Elapsed int64 `json:"elapsed_ms"` +} + +// ExecFunc is the agent.Exec call signature used by the executor. It matches +// the method on the hostagent Connect RPC client. +type ExecFunc func(ctx context.Context, req *connect.Request[pb.ExecRequest]) (*connect.Response[pb.ExecResponse], error) + +// Execute runs steps sequentially against sandboxID using execFn. +// +// - phase labels the log entries (e.g., "pre-build", "recipe", "post-build"). +// - startStep is the 1-based offset so entries are globally numbered across phases. +// - defaultTimeout applies to RUN steps with no per-step --timeout; 0 → 10 minutes. +// - bctx is mutated in place as ENV/WORKDIR steps execute, and carries forward +// into subsequent phases when the caller passes the same pointer. +// +// Returns all log entries appended during this call, the next step counter +// value, and whether all steps succeeded. On false the last entry contains +// failure details; the caller is responsible for destroying the sandbox and +// recording the build error. +func Execute( + ctx context.Context, + phase string, + steps []Step, + sandboxID string, + startStep int, + defaultTimeout time.Duration, + bctx *ExecContext, + execFn ExecFunc, +) (entries []BuildLogEntry, nextStep int, ok bool) { + if defaultTimeout <= 0 { + defaultTimeout = 10 * time.Minute + } + + step := startStep + for _, st := range steps { + step++ + slog.Info("executing build step", "phase", phase, "step", step, "instruction", st.Raw) + + switch st.Kind { + case KindENV: + if bctx.EnvVars == nil { + bctx.EnvVars = make(map[string]string) + } + bctx.EnvVars[st.Key] = st.Value + entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true}) + + case KindWORKDIR: + bctx.WorkDir = st.Path + entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true}) + + case KindUSER, KindCOPY: + verb := strings.ToUpper(strings.Fields(st.Raw)[0]) + entries = append(entries, BuildLogEntry{ + Step: step, + Phase: phase, + Cmd: st.Raw, + Stderr: verb + " is not yet supported", + Ok: false, + }) + return entries, step, false + + case KindSTART: + entry, succeeded := execStart(ctx, st, sandboxID, phase, step, bctx, execFn) + entries = append(entries, entry) + if !succeeded { + return entries, step, false + } + + case KindRUN: + timeout := defaultTimeout + if st.Timeout > 0 { + timeout = st.Timeout + } + entry, succeeded := execRun(ctx, st, sandboxID, phase, step, timeout, bctx, execFn) + entries = append(entries, entry) + if !succeeded { + return entries, step, false + } + } + } + return entries, step, true +} + +func execRun( + ctx context.Context, + st Step, + sandboxID, phase string, + step int, + timeout time.Duration, + bctx *ExecContext, + execFn ExecFunc, +) (BuildLogEntry, bool) { + execCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + start := time.Now() + resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{ + SandboxId: sandboxID, + Cmd: "/bin/sh", + Args: []string{"-c", bctx.WrappedCommand(st.Shell)}, + TimeoutSec: int32(timeout.Seconds()), + })) + + entry := BuildLogEntry{ + Step: step, + Phase: phase, + Cmd: st.Raw, + Elapsed: time.Since(start).Milliseconds(), + } + if err != nil { + entry.Stderr = fmt.Sprintf("exec error: %v", err) + return entry, false + } + entry.Stdout = string(resp.Msg.Stdout) + entry.Stderr = string(resp.Msg.Stderr) + entry.Exit = resp.Msg.ExitCode + entry.Ok = resp.Msg.ExitCode == 0 + return entry, entry.Ok +} + +func execStart( + ctx context.Context, + st Step, + sandboxID, phase string, + step int, + bctx *ExecContext, + execFn ExecFunc, +) (BuildLogEntry, bool) { + // START uses a short timeout: just long enough for the shell to fork and + // return. The background process itself runs indefinitely inside the VM. + execCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + start := time.Now() + resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{ + SandboxId: sandboxID, + Cmd: "/bin/sh", + Args: []string{"-c", bctx.StartCommand(st.Shell)}, + TimeoutSec: 10, + })) + + entry := BuildLogEntry{ + Step: step, + Phase: phase, + Cmd: st.Raw, + Elapsed: time.Since(start).Milliseconds(), + } + if err != nil { + entry.Stderr = fmt.Sprintf("start error: %v", err) + return entry, false + } + entry.Exit = resp.Msg.ExitCode + entry.Ok = resp.Msg.ExitCode == 0 + if !entry.Ok { + entry.Stderr = fmt.Sprintf("start failed with exit code %d: %s", resp.Msg.ExitCode, string(resp.Msg.Stderr)) + } + return entry, entry.Ok +} diff --git a/internal/recipe/step.go b/internal/recipe/step.go new file mode 100644 index 0000000..7d51036 --- /dev/null +++ b/internal/recipe/step.go @@ -0,0 +1,129 @@ +package recipe + +import ( + "fmt" + "strings" + "time" +) + +// Kind identifies the instruction type in a recipe line. +type Kind int + +const ( + KindRUN Kind = iota // Execute a command and wait for it to exit. + KindSTART // Start a command in the background (non-blocking). + KindENV // Set an environment variable for subsequent steps. + KindWORKDIR // Set the working directory for subsequent steps. + KindUSER // Switch the unix user for subsequent steps. (stub) + KindCOPY // Copy files into the sandbox. (stub) +) + +// Step is the parsed representation of one recipe instruction. +type Step struct { + Kind Kind + Raw string // original string, preserved for logging + Shell string // KindRUN, KindSTART: the shell command text + Timeout time.Duration // KindRUN: 0 means use caller's default + Key string // KindENV: variable name + Value string // KindENV: variable value + Path string // KindWORKDIR: directory path +} + +// ParseStep parses a single recipe instruction string into a Step. +// Instructions are Dockerfile-like: a keyword followed by arguments. +// +// Supported syntax: +// +// RUN — run command, wait for exit +// RUN --timeout= — run command with explicit timeout (e.g. --timeout=5m) +// START — start command in background, return immediately +// ENV = — set environment variable +// WORKDIR — set working directory +// USER — not yet supported +// COPY — not yet supported +func ParseStep(s string) (Step, error) { + s = strings.TrimSpace(s) + if s == "" { + return Step{}, fmt.Errorf("empty step") + } + + // Split on first space to get the keyword. + keyword, rest, _ := strings.Cut(s, " ") + rest = strings.TrimSpace(rest) + + switch strings.ToUpper(keyword) { + case "RUN": + return parseRUN(s, rest) + case "START": + return parseSTART(s, rest) + case "ENV": + return parseENV(s, rest) + case "WORKDIR": + return parseWORKDIR(s, rest) + case "USER": + return Step{Kind: KindUSER, Raw: s}, nil + case "COPY": + return Step{Kind: KindCOPY, Raw: s}, nil + default: + return Step{}, fmt.Errorf("unknown instruction %q (expected RUN, START, ENV, WORKDIR, USER, or COPY)", keyword) + } +} + +// ParseRecipe parses all recipe lines, returning on the first error. +func ParseRecipe(lines []string) ([]Step, error) { + steps := make([]Step, 0, len(lines)) + for i, line := range lines { + st, err := ParseStep(line) + if err != nil { + return nil, fmt.Errorf("recipe line %d: %w", i+1, err) + } + steps = append(steps, st) + } + return steps, nil +} + +func parseRUN(raw, rest string) (Step, error) { + var timeout time.Duration + if strings.HasPrefix(rest, "--timeout=") { + rest = rest[len("--timeout="):] + flag, cmd, found := strings.Cut(rest, " ") + if !found || strings.TrimSpace(cmd) == "" { + return Step{}, fmt.Errorf("RUN --timeout= flag has no command: %q", raw) + } + d, err := time.ParseDuration(flag) + if err != nil { + return Step{}, fmt.Errorf("RUN --timeout= invalid duration %q: %w", flag, err) + } + timeout = d + rest = strings.TrimSpace(cmd) + } + if rest == "" { + return Step{}, fmt.Errorf("RUN requires a command: %q", raw) + } + return Step{Kind: KindRUN, Raw: raw, Shell: rest, Timeout: timeout}, nil +} + +func parseSTART(raw, rest string) (Step, error) { + if rest == "" { + return Step{}, fmt.Errorf("START requires a command: %q", raw) + } + return Step{Kind: KindSTART, Raw: raw, Shell: rest}, nil +} + +func parseENV(raw, rest string) (Step, error) { + key, value, found := strings.Cut(rest, "=") + if !found { + return Step{}, fmt.Errorf("ENV requires KEY=VALUE format: %q", raw) + } + if key == "" { + return Step{}, fmt.Errorf("ENV key is empty: %q", raw) + } + return Step{Kind: KindENV, Raw: raw, Key: key, Value: value}, nil +} + +func parseWORKDIR(raw, path string) (Step, error) { + if path == "" { + return Step{}, fmt.Errorf("WORKDIR requires a path: %q", raw) + } + return Step{Kind: KindWORKDIR, Raw: raw, Path: path}, nil +} diff --git a/internal/recipe/step_test.go b/internal/recipe/step_test.go new file mode 100644 index 0000000..2370bb2 --- /dev/null +++ b/internal/recipe/step_test.go @@ -0,0 +1,208 @@ +package recipe + +import ( + "testing" + "time" +) + +func TestParseStep(t *testing.T) { + tests := []struct { + name string + input string + want Step + wantErr bool + }{ + // RUN + { + name: "RUN basic", + input: "RUN apt install -y curl", + want: Step{Kind: KindRUN, Raw: "RUN apt install -y curl", Shell: "apt install -y curl"}, + }, + { + name: "RUN lowercase", + input: "run echo hello", + want: Step{Kind: KindRUN, Raw: "run echo hello", Shell: "echo hello"}, + }, + { + name: "RUN with timeout", + input: "RUN --timeout=5m npm install", + want: Step{Kind: KindRUN, Raw: "RUN --timeout=5m npm install", Shell: "npm install", Timeout: 5 * time.Minute}, + }, + { + name: "RUN with timeout seconds", + input: "RUN --timeout=30s make build", + want: Step{Kind: KindRUN, Raw: "RUN --timeout=30s make build", Shell: "make build", Timeout: 30 * time.Second}, + }, + { + name: "RUN no command", + input: "RUN", + wantErr: true, + }, + { + name: "RUN timeout no command", + input: "RUN --timeout=5m", + wantErr: true, + }, + { + name: "RUN invalid timeout", + input: "RUN --timeout=notaduration echo hi", + wantErr: true, + }, + // START + { + name: "START basic", + input: "START python3 app.py", + want: Step{Kind: KindSTART, Raw: "START python3 app.py", Shell: "python3 app.py"}, + }, + { + name: "START uppercase", + input: "START node server.js --port=8080", + want: Step{Kind: KindSTART, Raw: "START node server.js --port=8080", Shell: "node server.js --port=8080"}, + }, + { + name: "START no command", + input: "START", + wantErr: true, + }, + // ENV + { + name: "ENV basic", + input: "ENV FOO=bar", + want: Step{Kind: KindENV, Raw: "ENV FOO=bar", Key: "FOO", Value: "bar"}, + }, + { + name: "ENV value with spaces", + input: "ENV GREETING=hello world", + want: Step{Kind: KindENV, Raw: "ENV GREETING=hello world", Key: "GREETING", Value: "hello world"}, + }, + { + name: "ENV value with equals sign", + input: "ENV URL=http://example.com?a=1", + want: Step{Kind: KindENV, Raw: "ENV URL=http://example.com?a=1", Key: "URL", Value: "http://example.com?a=1"}, + }, + { + name: "ENV empty value", + input: "ENV FOO=", + want: Step{Kind: KindENV, Raw: "ENV FOO=", Key: "FOO", Value: ""}, + }, + { + name: "ENV missing equals", + input: "ENV FOO", + wantErr: true, + }, + { + name: "ENV empty key", + input: "ENV =value", + wantErr: true, + }, + // WORKDIR + { + name: "WORKDIR basic", + input: "WORKDIR /app", + want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /app", Path: "/app"}, + }, + { + name: "WORKDIR with spaces in path", + input: "WORKDIR /my project", + want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /my project", Path: "/my project"}, + }, + { + name: "WORKDIR empty", + input: "WORKDIR", + wantErr: true, + }, + // USER and COPY stubs + { + name: "USER stub", + input: "USER www-data", + want: Step{Kind: KindUSER, Raw: "USER www-data"}, + }, + { + name: "COPY stub", + input: "COPY config.yaml /etc/app/config.yaml", + want: Step{Kind: KindCOPY, Raw: "COPY config.yaml /etc/app/config.yaml"}, + }, + // Unknown keyword + { + name: "unknown keyword", + input: "FROBNICATE something", + wantErr: true, + }, + // Empty input + { + name: "empty string", + input: "", + wantErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := ParseStep(tc.input) + if tc.wantErr { + if err == nil { + t.Fatalf("ParseStep(%q) expected error, got %+v", tc.input, got) + } + return + } + if err != nil { + t.Fatalf("ParseStep(%q) unexpected error: %v", tc.input, err) + } + if got != tc.want { + t.Errorf("ParseStep(%q)\n got %+v\n want %+v", tc.input, got, tc.want) + } + }) + } +} + +func TestParseRecipe(t *testing.T) { + t.Run("valid recipe", func(t *testing.T) { + lines := []string{ + "RUN apt update", + "WORKDIR /app", + "ENV PORT=8080", + "START python3 server.py", + "RUN --timeout=2m pip install -r requirements.txt", + } + steps, err := ParseRecipe(lines) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(steps) != 5 { + t.Fatalf("expected 5 steps, got %d", len(steps)) + } + if steps[0].Kind != KindRUN { + t.Errorf("step 0: want KindRUN, got %v", steps[0].Kind) + } + if steps[1].Kind != KindWORKDIR { + t.Errorf("step 1: want KindWORKDIR, got %v", steps[1].Kind) + } + if steps[3].Kind != KindSTART { + t.Errorf("step 3: want KindSTART, got %v", steps[3].Kind) + } + if steps[4].Timeout != 2*time.Minute { + t.Errorf("step 4: want 2m timeout, got %v", steps[4].Timeout) + } + }) + + t.Run("error on invalid line", func(t *testing.T) { + lines := []string{ + "RUN apt update", + "BADCMD something", + } + _, err := ParseRecipe(lines) + if err == nil { + t.Fatal("expected error for invalid line, got nil") + } + }) + + t.Run("empty recipe", func(t *testing.T) { + steps, err := ParseRecipe(nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(steps) != 0 { + t.Fatalf("expected 0 steps, got %d", len(steps)) + } + }) +} diff --git a/internal/service/build.go b/internal/service/build.go index 2592a6d..1108044 100644 --- a/internal/service/build.go +++ b/internal/service/build.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log/slog" + "sync" "time" "connectrpc.com/connect" @@ -14,6 +15,7 @@ import ( "git.omukk.dev/wrenn/sandbox/internal/db" "git.omukk.dev/wrenn/sandbox/internal/id" "git.omukk.dev/wrenn/sandbox/internal/lifecycle" + "git.omukk.dev/wrenn/sandbox/internal/recipe" "git.omukk.dev/wrenn/sandbox/internal/scheduler" pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen" ) @@ -27,14 +29,14 @@ const ( // preBuildCmds run before the user recipe to prepare the build environment. var preBuildCmds = []string{ - "apt update", + "RUN apt update", } // postBuildCmds run after the user recipe to clean up caches and reduce image size. var postBuildCmds = []string{ - "apt clean", - "apt autoremove -y", - "rm -rf /var/lib/apt/lists/*", + "RUN apt clean", + "RUN apt autoremove -y", + "RUN rm -rf /var/lib/apt/lists/*", } // buildAgentClient is the subset of the host agent client used by the build worker. @@ -46,24 +48,15 @@ type buildAgentClient interface { FlattenRootfs(ctx context.Context, req *connect.Request[pb.FlattenRootfsRequest]) (*connect.Response[pb.FlattenRootfsResponse], error) } -// BuildLogEntry represents a single entry in the build log JSONB array. -type BuildLogEntry struct { - Step int `json:"step"` - Phase string `json:"phase"` // "pre-build", "recipe", or "post-build" - Cmd string `json:"cmd"` - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` - Exit int32 `json:"exit"` - Ok bool `json:"ok"` - Elapsed int64 `json:"elapsed_ms"` -} - // BuildService handles template build orchestration. type BuildService struct { DB *db.Queries Redis *redis.Client Pool *lifecycle.HostClientPool Scheduler scheduler.HostScheduler + + mu sync.Mutex + cancelMap map[string]context.CancelFunc // buildID → per-build cancel func } // BuildCreateParams holds the parameters for creating a template build. @@ -74,6 +67,7 @@ type BuildCreateParams struct { Healthcheck string VCPUs int32 MemoryMB int32 + SkipPrePost bool } // Create inserts a new build record and enqueues it to Redis. @@ -97,6 +91,11 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp buildIDStr := id.FormatBuildID(buildID) newTemplateID := id.NewTemplateID() + defaultSteps := len(preBuildCmds) + len(postBuildCmds) + if p.SkipPrePost { + defaultSteps = 0 + } + build, err := s.DB.InsertTemplateBuild(ctx, db.InsertTemplateBuildParams{ ID: buildID, Name: p.Name, @@ -105,9 +104,10 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp Healthcheck: p.Healthcheck, Vcpus: p.VCPUs, MemoryMb: p.MemoryMB, - TotalSteps: int32(len(p.Recipe) + len(preBuildCmds) + len(postBuildCmds)), + TotalSteps: int32(len(p.Recipe) + defaultSteps), TemplateID: newTemplateID, TeamID: id.PlatformTeamID, + SkipPrePost: p.SkipPrePost, }) if err != nil { return db.TemplateBuild{}, fmt.Errorf("insert build: %w", err) @@ -131,6 +131,40 @@ func (s *BuildService) List(ctx context.Context) ([]db.TemplateBuild, error) { return s.DB.ListTemplateBuilds(ctx) } +// Cancel cancels a pending or running build. For pending builds the status is +// updated in the DB and the worker skips it when dequeued. For running builds +// the per-build context is cancelled, which causes the current exec step to +// abort; executeBuild then detects the cancellation and records the status. +func (s *BuildService) Cancel(ctx context.Context, buildID pgtype.UUID) error { + build, err := s.DB.GetTemplateBuild(ctx, buildID) + if err != nil { + return fmt.Errorf("get build: %w", err) + } + switch build.Status { + case "success", "failed", "cancelled": + return fmt.Errorf("build is already %s", build.Status) + } + + // Mark cancelled in DB first. This handles both pending builds (which haven't + // been picked up yet) and acts as a flag for executeBuild to check on start. + if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ + ID: buildID, Status: "cancelled", + }); err != nil { + return fmt.Errorf("update build status: %w", err) + } + + // If the build is currently running, signal its context. + buildIDStr := id.FormatBuildID(buildID) + s.mu.Lock() + cancel, running := s.cancelMap[buildIDStr] + s.mu.Unlock() + if running { + cancel() + } + + return nil +} + // StartWorkers launches n goroutines that consume from the Redis build queue. // The returned cancel function stops all workers. func (s *BuildService) StartWorkers(ctx context.Context, n int) context.CancelFunc { @@ -172,14 +206,38 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { return } - build, err := s.DB.GetTemplateBuild(ctx, buildID) + // Create a per-build context so this build can be cancelled independently of + // the worker. Register in cancelMap before fetching the build so that a + // concurrent Cancel call can always find and signal it. + buildCtx, buildCancel := context.WithCancel(ctx) + defer buildCancel() + + s.mu.Lock() + if s.cancelMap == nil { + s.cancelMap = make(map[string]context.CancelFunc) + } + s.cancelMap[buildIDStr] = buildCancel + s.mu.Unlock() + defer func() { + s.mu.Lock() + delete(s.cancelMap, buildIDStr) + s.mu.Unlock() + }() + + build, err := s.DB.GetTemplateBuild(buildCtx, buildID) if err != nil { log.Error("failed to fetch build", "error", err) return } + // Skip if already cancelled (Cancel was called before we dequeued). + if build.Status == "cancelled" { + log.Info("build already cancelled, skipping") + return + } + // Mark as running. - if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ + if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{ ID: buildID, Status: "running", }); err != nil { log.Error("failed to update build status", "error", err) @@ -187,22 +245,22 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { } // Parse user recipe. - var recipe []string - if err := json.Unmarshal(build.Recipe, &recipe); err != nil { - s.failBuild(ctx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err)) + var userRecipe []string + if err := json.Unmarshal(build.Recipe, &userRecipe); err != nil { + s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err)) return } // Pick a platform host and create a sandbox. - host, err := s.Scheduler.SelectHost(ctx, id.PlatformTeamID, false) + host, err := s.Scheduler.SelectHost(buildCtx, id.PlatformTeamID, false) if err != nil { - s.failBuild(ctx, buildID, fmt.Sprintf("no host available: %v", err)) + s.failBuild(buildCtx, buildID, fmt.Sprintf("no host available: %v", err)) return } agent, err := s.Pool.GetForHost(host) if err != nil { - s.failBuild(ctx, buildID, fmt.Sprintf("agent client error: %v", err)) + s.failBuild(buildCtx, buildID, fmt.Sprintf("agent client error: %v", err)) return } @@ -214,16 +272,16 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { baseTeamID := id.PlatformTeamID baseTemplateID := id.MinimalTemplateID if build.BaseTemplate != "minimal" { - baseTmpl, err := s.DB.GetPlatformTemplateByName(ctx, build.BaseTemplate) + baseTmpl, err := s.DB.GetPlatformTemplateByName(buildCtx, build.BaseTemplate) if err != nil { - s.failBuild(ctx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err)) + s.failBuild(buildCtx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err)) return } baseTeamID = baseTmpl.TeamID baseTemplateID = baseTmpl.ID } - resp, err := agent.CreateSandbox(ctx, connect.NewRequest(&pb.CreateSandboxRequest{ + resp, err := agent.CreateSandbox(buildCtx, connect.NewRequest(&pb.CreateSandboxRequest{ SandboxId: sandboxIDStr, Template: build.BaseTemplate, TeamId: id.UUIDString(baseTeamID), @@ -234,129 +292,121 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { DiskSizeMb: 5120, // 5 GB for template builds })) if err != nil { - s.failBuild(ctx, buildID, fmt.Sprintf("create sandbox failed: %v", err)) + s.failBuild(buildCtx, buildID, fmt.Sprintf("create sandbox failed: %v", err)) return } _ = resp // Record sandbox/host association. - _ = s.DB.UpdateBuildSandbox(ctx, db.UpdateBuildSandboxParams{ + _ = s.DB.UpdateBuildSandbox(buildCtx, db.UpdateBuildSandboxParams{ ID: buildID, SandboxID: sandboxID, HostID: host.ID, }) + // Parse recipe steps. preBuildCmds and postBuildCmds are hardcoded and always + // valid; panic on error is appropriate here since it would be a programmer mistake. + preBuildSteps, err := recipe.ParseRecipe(preBuildCmds) + if err != nil { + panic(fmt.Sprintf("invalid pre-build recipe: %v", err)) + } + userRecipeSteps, err := recipe.ParseRecipe(userRecipe) + if err != nil { + s.destroySandbox(buildCtx, agent, sandboxIDStr) + s.failBuild(buildCtx, buildID, fmt.Sprintf("recipe parse error: %v", err)) + return + } + postBuildSteps, err := recipe.ParseRecipe(postBuildCmds) + if err != nil { + panic(fmt.Sprintf("invalid post-build recipe: %v", err)) + } + // Execute build phases: pre-build → user recipe → post-build. - var logs []BuildLogEntry + // bctx carries working directory and env vars across all phases. + var logs []recipe.BuildLogEntry step := 0 + bctx := &recipe.ExecContext{} - // Helper to run a list of commands in a given phase. - // timeout=0 means no timeout (uses parent context). - runPhase := func(phase string, cmds []string, timeout time.Duration) bool { - for _, cmd := range cmds { - step++ - log.Info("executing build step", "phase", phase, "step", step, "cmd", cmd) - - execCtx := ctx - var cancel context.CancelFunc - // When no timeout is specified, use 10 minutes as a generous upper - // bound. The host agent defaults TimeoutSec=0 to 30s, so we must - // always send an explicit value. - effectiveTimeout := timeout - if effectiveTimeout <= 0 { - effectiveTimeout = 10 * time.Minute - } - execCtx, cancel = context.WithTimeout(ctx, effectiveTimeout) - timeoutSec := int32(effectiveTimeout.Seconds()) - - start := time.Now() - execResp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ - SandboxId: sandboxIDStr, - Cmd: "/bin/sh", - Args: []string{"-c", cmd}, - TimeoutSec: timeoutSec, - })) - cancel() - - entry := BuildLogEntry{ - Step: step, - Phase: phase, - Cmd: cmd, - Elapsed: time.Since(start).Milliseconds(), - } - - if err != nil { - entry.Stderr = err.Error() - entry.Ok = false - logs = append(logs, entry) - s.updateLogs(ctx, buildID, step, logs) - s.destroySandbox(ctx, agent, sandboxIDStr) - s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed: %v", phase, step, err)) + runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool { + newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec) + logs = append(logs, newEntries...) + step = nextStep + s.updateLogs(buildCtx, buildID, step, logs) + if !ok { + s.destroySandbox(buildCtx, agent, sandboxIDStr) + // If the build was cancelled, status is already set — don't overwrite with "failed". + if buildCtx.Err() != nil { return false } - - entry.Stdout = string(execResp.Msg.Stdout) - entry.Stderr = string(execResp.Msg.Stderr) - entry.Exit = execResp.Msg.ExitCode - entry.Ok = execResp.Msg.ExitCode == 0 - logs = append(logs, entry) - s.updateLogs(ctx, buildID, step, logs) - - if execResp.Msg.ExitCode != 0 { - s.destroySandbox(ctx, agent, sandboxIDStr) - s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed with exit code %d", phase, step, execResp.Msg.ExitCode)) - return false + last := newEntries[len(newEntries)-1] + reason := last.Stderr + if reason == "" { + reason = fmt.Sprintf("exit code %d", last.Exit) } + s.failBuild(buildCtx, buildID, fmt.Sprintf("%s step %d failed: %s", phase, step, reason)) } - return true + return ok } - if !runPhase("pre-build", preBuildCmds, 0) { + if !build.SkipPrePost { + if !runPhase("pre-build", preBuildSteps, 0) { + return + } + } + if !runPhase("recipe", userRecipeSteps, buildCommandTimeout) { return } - if !runPhase("recipe", recipe, buildCommandTimeout) { - return - } - if !runPhase("post-build", postBuildCmds, 0) { - return + if !build.SkipPrePost { + if !runPhase("post-build", postBuildSteps, 0) { + return + } } // Healthcheck or direct snapshot. var sizeBytes int64 if build.Healthcheck != "" { log.Info("running healthcheck", "cmd", build.Healthcheck) - if err := s.waitForHealthcheck(ctx, agent, sandboxIDStr, build.Healthcheck); err != nil { - s.destroySandbox(ctx, agent, sandboxIDStr) - s.failBuild(ctx, buildID, fmt.Sprintf("healthcheck failed: %v", err)) + if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil { + s.destroySandbox(buildCtx, agent, sandboxIDStr) + if buildCtx.Err() != nil { + return + } + s.failBuild(buildCtx, buildID, fmt.Sprintf("healthcheck failed: %v", err)) return } // Healthcheck passed → full snapshot (with memory/CPU state). log.Info("healthcheck passed, creating snapshot") - snapResp, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{ + snapResp, err := agent.CreateSnapshot(buildCtx, connect.NewRequest(&pb.CreateSnapshotRequest{ SandboxId: sandboxIDStr, Name: build.Name, TeamId: id.UUIDString(build.TeamID), TemplateId: id.UUIDString(build.TemplateID), })) if err != nil { - s.destroySandbox(ctx, agent, sandboxIDStr) - s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err)) + s.destroySandbox(buildCtx, agent, sandboxIDStr) + if buildCtx.Err() != nil { + return + } + s.failBuild(buildCtx, buildID, fmt.Sprintf("create snapshot failed: %v", err)) return } sizeBytes = snapResp.Msg.SizeBytes } else { // No healthcheck → image-only template (rootfs only). log.Info("no healthcheck, flattening rootfs") - flatResp, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{ + flatResp, err := agent.FlattenRootfs(buildCtx, connect.NewRequest(&pb.FlattenRootfsRequest{ SandboxId: sandboxIDStr, Name: build.Name, TeamId: id.UUIDString(build.TeamID), TemplateId: id.UUIDString(build.TemplateID), })) if err != nil { - s.destroySandbox(ctx, agent, sandboxIDStr) - s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err)) + s.destroySandbox(buildCtx, agent, sandboxIDStr) + if buildCtx.Err() != nil { + return + } + s.failBuild(buildCtx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err)) return } sizeBytes = flatResp.Msg.SizeBytes @@ -368,7 +418,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { templateType = "snapshot" } - if _, err := s.DB.InsertTemplate(ctx, db.InsertTemplateParams{ + if _, err := s.DB.InsertTemplate(buildCtx, db.InsertTemplateParams{ ID: build.TemplateID, Name: build.Name, Type: templateType, @@ -386,7 +436,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { // No additional destroy needed. // Mark build as success. - if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ + if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{ ID: buildID, Status: "success", }); err != nil { log.Error("failed to mark build as success", "error", err) @@ -429,7 +479,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC } } -func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []BuildLogEntry) { +func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []recipe.BuildLogEntry) { logsJSON, err := json.Marshal(logs) if err != nil { slog.Warn("failed to marshal build logs", "error", err)