1
0
forked from wrenn/wrenn

Add skip_pre_post build option, cancel endpoint, and recipe package

- skip_pre_post flag on builds bypasses apt update/clean pre/post steps for
  faster iteration when the recipe handles its own environment setup
- POST /v1/admin/builds/{id}/cancel endpoint marks an in-progress build as
  cancelled; UpdateBuildStatus now also sets completed_at for 'cancelled'
- internal/recipe: typed recipe parser and executor (RUN/ENV/COPY steps)
  replacing the raw string slice approach in the build worker
- pre/post build commands prefixed with RUN to match recipe step format
This commit is contained in:
2026-03-30 21:24:52 +06:00
parent 25ce0729d5
commit 948db13bed
12 changed files with 981 additions and 134 deletions

View File

@ -0,0 +1,11 @@
-- +goose Up
-- Allow completed_at to be set when a build is cancelled.
-- (The UpdateBuildStatus query is updated in sqlc; no schema change needed for that.)
-- Add skip_pre_post flag: when true, the pre-build and post-build command phases
-- are skipped for this build.
ALTER TABLE template_builds ADD COLUMN skip_pre_post BOOLEAN NOT NULL DEFAULT FALSE;
-- +goose Down
ALTER TABLE template_builds DROP COLUMN skip_pre_post;

View File

@ -1,6 +1,6 @@
-- name: InsertTemplateBuild :one -- name: InsertTemplateBuild :one
INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id) INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id, skip_pre_post)
VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10) VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10, $11)
RETURNING *; RETURNING *;
-- name: GetTemplateBuild :one -- name: GetTemplateBuild :one
@ -13,7 +13,7 @@ SELECT * FROM template_builds ORDER BY created_at DESC;
UPDATE template_builds UPDATE template_builds
SET status = $2, SET status = $2,
started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END,
completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END completed_at = CASE WHEN $2 IN ('success', 'failed', 'cancelled') THEN NOW() ELSE completed_at END
WHERE id = $1 WHERE id = $1
RETURNING *; RETURNING *;

View File

@ -38,6 +38,7 @@ export type CreateBuildParams = {
healthcheck?: string; healthcheck?: string;
vcpus?: number; vcpus?: number;
memory_mb?: number; memory_mb?: number;
skip_pre_post?: boolean;
}; };
export async function createBuild(params: CreateBuildParams): Promise<ApiResult<Build>> { export async function createBuild(params: CreateBuildParams): Promise<ApiResult<Build>> {
@ -69,3 +70,7 @@ export async function listAdminTemplates(): Promise<ApiResult<AdminTemplate[]>>
export async function deleteAdminTemplate(name: string): Promise<ApiResult<void>> { export async function deleteAdminTemplate(name: string): Promise<ApiResult<void>> {
return apiFetch('DELETE', `/api/v1/admin/templates/${name}`); return apiFetch('DELETE', `/api/v1/admin/templates/${name}`);
} }
export async function cancelBuild(id: string): Promise<ApiResult<void>> {
return apiFetch('POST', `/api/v1/admin/builds/${id}/cancel`);
}

View File

@ -6,6 +6,7 @@
import { import {
listBuilds, listBuilds,
createBuild, createBuild,
cancelBuild,
listAdminTemplates, listAdminTemplates,
deleteAdminTemplate, deleteAdminTemplate,
type Build, type Build,
@ -52,11 +53,15 @@
vcpus: 1, vcpus: 1,
memory_mb: 512, memory_mb: 512,
recipe: '', recipe: '',
healthcheck: '' healthcheck: '',
skip_pre_post: false
}); });
let creating = $state(false); let creating = $state(false);
let createError = $state<string | null>(null); let createError = $state<string | null>(null);
// Cancel build state
let cancelingBuildId = $state<string | null>(null);
// Stats // Stats
let templateCount = $derived(templates.length); let templateCount = $derived(templates.length);
let snapshotCount = $derived(templates.filter((t) => t.type === 'snapshot').length); let snapshotCount = $derived(templates.filter((t) => t.type === 'snapshot').length);
@ -123,12 +128,13 @@
recipe: lines, recipe: lines,
healthcheck: createForm.healthcheck.trim() || undefined, healthcheck: createForm.healthcheck.trim() || undefined,
vcpus: createForm.vcpus, vcpus: createForm.vcpus,
memory_mb: createForm.memory_mb memory_mb: createForm.memory_mb,
skip_pre_post: createForm.skip_pre_post
}); });
if (result.ok) { if (result.ok) {
showCreate = false; showCreate = false;
createForm = { name: '', base_template: 'minimal', vcpus: 1, memory_mb: 512, recipe: '', healthcheck: '' }; createForm = { name: '', base_template: 'minimal', vcpus: 1, memory_mb: 512, recipe: '', healthcheck: '', skip_pre_post: false };
builds = [result.data, ...builds]; builds = [result.data, ...builds];
activeTab = 'builds'; activeTab = 'builds';
expandedBuildId = result.data.id; expandedBuildId = result.data.id;
@ -156,6 +162,18 @@
deleting = false; deleting = false;
} }
async function handleCancelBuild(buildId: string) {
cancelingBuildId = buildId;
const result = await cancelBuild(buildId);
if (result.ok) {
builds = builds.map((b) => b.id === buildId ? { ...b, status: 'cancelled' } : b);
toast.success('Build cancelled');
} else {
toast.error(result.error ?? 'Failed to cancel build');
}
cancelingBuildId = null;
}
function toggleBuildExpand(buildId: string) { function toggleBuildExpand(buildId: string) {
if (expandedBuildId === buildId) { if (expandedBuildId === buildId) {
expandedBuildId = null; expandedBuildId = null;
@ -198,6 +216,24 @@
case 'success': return 'var(--color-accent-bright)'; case 'success': return 'var(--color-accent-bright)';
case 'failed': return 'var(--color-red)'; case 'failed': return 'var(--color-red)';
case 'running': return 'var(--color-blue)'; case 'running': return 'var(--color-blue)';
case 'cancelled': return 'var(--color-amber)';
default: return 'var(--color-text-muted)';
}
}
// Returns [keyword, rest] from a recipe instruction string.
function splitInstruction(cmd: string): [string, string] {
const idx = cmd.indexOf(' ');
if (idx === -1) return [cmd.toUpperCase(), ''];
return [cmd.slice(0, idx).toUpperCase(), cmd.slice(idx + 1)];
}
function keywordColor(keyword: string): string {
switch (keyword) {
case 'RUN': return 'var(--color-blue)';
case 'START': return 'var(--color-accent-bright)';
case 'ENV': return 'var(--color-amber)';
case 'WORKDIR': return 'var(--color-text-tertiary)';
default: return 'var(--color-text-muted)'; default: return 'var(--color-text-muted)';
} }
} }
@ -512,6 +548,22 @@
<tr> <tr>
<td colspan="7" class="border-b border-[var(--color-border)] last:border-0"> <td colspan="7" class="border-b border-[var(--color-border)] last:border-0">
<div class="bg-[var(--color-bg-0)] px-6 py-4" style="animation: fadeUp 0.15s ease both"> <div class="bg-[var(--color-bg-0)] px-6 py-4" style="animation: fadeUp 0.15s ease both">
{#if build.status === 'pending' || build.status === 'running'}
<div class="mb-4 flex justify-end">
<button
onclick={(e) => { e.stopPropagation(); handleCancelBuild(build.id); }}
disabled={cancelingBuildId === build.id}
class="flex items-center gap-1.5 rounded-[var(--radius-button)] border border-[var(--color-red)]/30 bg-[var(--color-red)]/8 px-3 py-1.5 text-meta text-[var(--color-red)] transition-colors duration-150 hover:bg-[var(--color-red)]/15 disabled:opacity-50"
>
{#if cancelingBuildId === build.id}
<svg class="animate-spin" width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 12a9 9 0 1 1-6.219-8.56"/></svg>
{:else}
<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
{/if}
Cancel build
</button>
</div>
{/if}
{#if build.error} {#if build.error}
<div class="mb-4 rounded-[var(--radius-input)] border border-[var(--color-red)]/30 bg-[var(--color-red)]/5 px-3 py-2 text-meta text-[var(--color-red)]"> <div class="mb-4 rounded-[var(--radius-input)] border border-[var(--color-red)]/30 bg-[var(--color-red)]/5 px-3 py-2 text-meta text-[var(--color-red)]">
{build.error} {build.error}
@ -524,6 +576,7 @@
{@const isInternal = log.phase === 'pre-build' || log.phase === 'post-build'} {@const isInternal = log.phase === 'pre-build' || log.phase === 'post-build'}
{@const recipeIdx = log.phase === 'recipe' ? build.logs.filter(l => l.phase === 'recipe' && l.step <= log.step).length : 0} {@const recipeIdx = log.phase === 'recipe' ? build.logs.filter(l => l.phase === 'recipe' && l.step <= log.step).length : 0}
{@const phaseLabel = isInternal ? (log.phase === 'pre-build' ? 'Pre-build' : 'Post-build') : `Step ${recipeIdx}`} {@const phaseLabel = isInternal ? (log.phase === 'pre-build' ? 'Pre-build' : 'Post-build') : `Step ${recipeIdx}`}
{@const [kw, kwRest] = splitInstruction(log.cmd)}
<div class="rounded-[var(--radius-input)] border border-[var(--color-border)] bg-[var(--color-bg-1)] overflow-hidden"> <div class="rounded-[var(--radius-input)] border border-[var(--color-border)] bg-[var(--color-bg-1)] overflow-hidden">
<!-- Step header --> <!-- Step header -->
<button <button
@ -536,16 +589,8 @@
{:else} {:else}
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="var(--color-red)" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" class="shrink-0"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg> <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="var(--color-red)" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" class="shrink-0"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
{/if} {/if}
{#if isInternal} <span class="shrink-0 text-label font-semibold text-[var(--color-text-tertiary)]">{phaseLabel}</span>
<span class="flex-1 text-label font-semibold text-[var(--color-text-tertiary)]"> <code class="flex-1 truncate font-mono text-meta"><span style="color: {keywordColor(kw)}">{kw}</span>{#if kwRest}<span class="text-[var(--color-text-secondary)]"> {kwRest}</span>{/if}</code>
{phaseLabel}
</span>
{:else}
<span class="text-label font-semibold text-[var(--color-text-tertiary)]">
{phaseLabel}
</span>
<code class="flex-1 truncate font-mono text-meta text-[var(--color-text-primary)]">{log.cmd}</code>
{/if}
<span class="shrink-0 font-mono text-label text-[var(--color-text-muted)]">{log.elapsed_ms}ms</span> <span class="shrink-0 font-mono text-label text-[var(--color-text-muted)]">{log.elapsed_ms}ms</span>
{#if log.exit !== 0} {#if log.exit !== 0}
<span class="shrink-0 rounded-full bg-[var(--color-red)]/10 px-1.5 py-0.5 font-mono text-label text-[var(--color-red)]"> <span class="shrink-0 rounded-full bg-[var(--color-red)]/10 px-1.5 py-0.5 font-mono text-label text-[var(--color-red)]">
@ -601,9 +646,10 @@
<span class="text-label font-semibold uppercase tracking-[0.06em] text-[var(--color-text-tertiary)]">Recipe</span> <span class="text-label font-semibold uppercase tracking-[0.06em] text-[var(--color-text-tertiary)]">Recipe</span>
<div class="mt-2 rounded-[var(--radius-input)] bg-[var(--color-bg-1)] border border-[var(--color-border)] px-3 py-2"> <div class="mt-2 rounded-[var(--radius-input)] bg-[var(--color-bg-1)] border border-[var(--color-border)] px-3 py-2">
{#each build.recipe as cmd, i} {#each build.recipe as cmd, i}
{@const [kw, kwRest] = splitInstruction(cmd)}
<div class="flex gap-2 py-0.5"> <div class="flex gap-2 py-0.5">
<span class="shrink-0 font-mono text-label text-[var(--color-text-muted)] tabular-nums">{i + 1}.</span> <span class="shrink-0 font-mono text-label text-[var(--color-text-muted)] tabular-nums">{i + 1}.</span>
<code class="font-mono text-meta text-[var(--color-text-secondary)]">{cmd}</code> <code class="font-mono text-meta"><span style="color: {keywordColor(kw)}">{kw}</span>{#if kwRest}<span class="text-[var(--color-text-secondary)]"> {kwRest}</span>{/if}</code>
</div> </div>
{/each} {/each}
</div> </div>
@ -712,18 +758,18 @@
<div> <div>
<label class="mb-1.5 block text-label font-semibold uppercase tracking-[0.05em] text-[var(--color-text-tertiary)]" for="tmpl-recipe"> <label class="mb-1.5 block text-label font-semibold uppercase tracking-[0.05em] text-[var(--color-text-tertiary)]" for="tmpl-recipe">
Recipe <span class="normal-case font-normal text-[var(--color-text-muted)]">(one command per line)</span> Recipe <span class="normal-case font-normal text-[var(--color-text-muted)]">(one instruction per line)</span>
</label> </label>
<textarea <textarea
id="tmpl-recipe" id="tmpl-recipe"
rows="6" rows="7"
placeholder={"apt-get update\napt-get install -y python3 python3-pip\npip3 install numpy pandas"} placeholder={"RUN apt-get install -y python3 python3-pip\nWORKDIR /app\nENV PORT=8080\nRUN pip3 install numpy pandas\nSTART python3 server.py"}
bind:value={createForm.recipe} bind:value={createForm.recipe}
disabled={creating} disabled={creating}
class="w-full resize-y rounded-[var(--radius-input)] border border-[var(--color-border)] bg-[var(--color-bg-4)] px-3 py-2 font-mono text-meta leading-relaxed text-[var(--color-text-bright)] outline-none placeholder:text-[var(--color-text-muted)] transition-colors duration-150 focus:border-[var(--color-accent)] disabled:opacity-60" class="w-full resize-y rounded-[var(--radius-input)] border border-[var(--color-border)] bg-[var(--color-bg-4)] px-3 py-2 font-mono text-meta leading-relaxed text-[var(--color-text-bright)] outline-none placeholder:text-[var(--color-text-muted)] transition-colors duration-150 focus:border-[var(--color-accent)] disabled:opacity-60"
></textarea> ></textarea>
<p class="mt-1 text-label text-[var(--color-text-muted)]"> <p class="mt-1 text-label text-[var(--color-text-muted)]">
Each command runs with a 30s timeout. Non-zero exit codes abort the build. Supports <code class="font-mono">RUN</code>, <code class="font-mono">START</code>, <code class="font-mono">WORKDIR</code>, <code class="font-mono">ENV key=value</code>. RUN steps have a 30s timeout; override with <code class="font-mono">RUN --timeout=5m</code>.
</p> </p>
</div> </div>
@ -743,6 +789,16 @@
If set, the build will poll this command every 1s (up to 60s) after the recipe completes. On success, a full snapshot (with memory state) is created. Without a healthcheck, only the rootfs is saved. If set, the build will poll this command every 1s (up to 60s) after the recipe completes. On success, a full snapshot (with memory state) is created. Without a healthcheck, only the rootfs is saved.
</p> </p>
</div> </div>
<label class="flex cursor-pointer items-center gap-2.5">
<input
type="checkbox"
bind:checked={createForm.skip_pre_post}
disabled={creating}
class="h-4 w-4 cursor-pointer rounded border border-[var(--color-border)] bg-[var(--color-bg-4)] accent-[var(--color-accent)]"
/>
<span class="text-ui text-[var(--color-text-secondary)]">Skip pre-build and post-build steps</span>
</label>
</div> </div>
<div class="mt-6 flex justify-end gap-3"> <div class="mt-6 flex justify-end gap-3">

View File

@ -36,6 +36,7 @@ type createBuildRequest struct {
Healthcheck string `json:"healthcheck"` Healthcheck string `json:"healthcheck"`
VCPUs int32 `json:"vcpus"` VCPUs int32 `json:"vcpus"`
MemoryMB int32 `json:"memory_mb"` MemoryMB int32 `json:"memory_mb"`
SkipPrePost bool `json:"skip_pre_post"`
} }
type buildResponse struct { type buildResponse struct {
@ -127,6 +128,7 @@ func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) {
Healthcheck: req.Healthcheck, Healthcheck: req.Healthcheck,
VCPUs: req.VCPUs, VCPUs: req.VCPUs,
MemoryMB: req.MemoryMB, MemoryMB: req.MemoryMB,
SkipPrePost: req.SkipPrePost,
}) })
if err != nil { if err != nil {
slog.Error("failed to create build", "error", err) slog.Error("failed to create build", "error", err)
@ -254,3 +256,21 @@ func (h *buildHandler) DeleteTemplate(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent) w.WriteHeader(http.StatusNoContent)
} }
// Cancel handles POST /v1/admin/builds/{id}/cancel.
func (h *buildHandler) Cancel(w http.ResponseWriter, r *http.Request) {
buildIDStr := chi.URLParam(r, "id")
buildID, err := id.ParseBuildID(buildIDStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "invalid build ID")
return
}
if err := h.svc.Cancel(r.Context(), buildID); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", err.Error())
return
}
w.WriteHeader(http.StatusNoContent)
}

View File

@ -12,7 +12,7 @@ import (
) )
const getTemplateBuild = `-- name: GetTemplateBuild :one const getTemplateBuild = `-- name: GetTemplateBuild :one
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds WHERE id = $1 SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds WHERE id = $1
` `
func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (TemplateBuild, error) { func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (TemplateBuild, error) {
@ -38,14 +38,15 @@ func (q *Queries) GetTemplateBuild(ctx context.Context, id pgtype.UUID) (Templat
&i.CompletedAt, &i.CompletedAt,
&i.TemplateID, &i.TemplateID,
&i.TeamID, &i.TeamID,
&i.SkipPrePost,
) )
return i, err return i, err
} }
const insertTemplateBuild = `-- name: InsertTemplateBuild :one const insertTemplateBuild = `-- name: InsertTemplateBuild :one
INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id) INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps, template_id, team_id, skip_pre_post)
VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10) VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8, $9, $10, $11)
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post
` `
type InsertTemplateBuildParams struct { type InsertTemplateBuildParams struct {
@ -59,6 +60,7 @@ type InsertTemplateBuildParams struct {
TotalSteps int32 `json:"total_steps"` TotalSteps int32 `json:"total_steps"`
TemplateID pgtype.UUID `json:"template_id"` TemplateID pgtype.UUID `json:"template_id"`
TeamID pgtype.UUID `json:"team_id"` TeamID pgtype.UUID `json:"team_id"`
SkipPrePost bool `json:"skip_pre_post"`
} }
func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBuildParams) (TemplateBuild, error) { func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBuildParams) (TemplateBuild, error) {
@ -73,6 +75,7 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui
arg.TotalSteps, arg.TotalSteps,
arg.TemplateID, arg.TemplateID,
arg.TeamID, arg.TeamID,
arg.SkipPrePost,
) )
var i TemplateBuild var i TemplateBuild
err := row.Scan( err := row.Scan(
@ -95,12 +98,13 @@ func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBui
&i.CompletedAt, &i.CompletedAt,
&i.TemplateID, &i.TemplateID,
&i.TeamID, &i.TeamID,
&i.SkipPrePost,
) )
return i, err return i, err
} }
const listTemplateBuilds = `-- name: ListTemplateBuilds :many const listTemplateBuilds = `-- name: ListTemplateBuilds :many
SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id FROM template_builds ORDER BY created_at DESC SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post FROM template_builds ORDER BY created_at DESC
` `
func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, error) { func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, error) {
@ -132,6 +136,7 @@ func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, erro
&i.CompletedAt, &i.CompletedAt,
&i.TemplateID, &i.TemplateID,
&i.TeamID, &i.TeamID,
&i.SkipPrePost,
); err != nil { ); err != nil {
return nil, err return nil, err
} }
@ -197,9 +202,9 @@ const updateBuildStatus = `-- name: UpdateBuildStatus :one
UPDATE template_builds UPDATE template_builds
SET status = $2, SET status = $2,
started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END,
completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END completed_at = CASE WHEN $2 IN ('success', 'failed', 'cancelled') THEN NOW() ELSE completed_at END
WHERE id = $1 WHERE id = $1
RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at, template_id, team_id, skip_pre_post
` `
type UpdateBuildStatusParams struct { type UpdateBuildStatusParams struct {
@ -230,6 +235,7 @@ func (q *Queries) UpdateBuildStatus(ctx context.Context, arg UpdateBuildStatusPa
&i.CompletedAt, &i.CompletedAt,
&i.TemplateID, &i.TemplateID,
&i.TeamID, &i.TeamID,
&i.SkipPrePost,
) )
return i, err return i, err
} }

View File

@ -0,0 +1,63 @@
package recipe
import "strings"
// ExecContext holds mutable state that persists across recipe steps.
// It is initialized empty and updated by ENV and WORKDIR steps.
type ExecContext struct {
WorkDir string
EnvVars map[string]string
}
// WrappedCommand returns the full shell command for a RUN step with context
// applied. The result is passed as the argument to /bin/sh -c.
//
// If WORKDIR and/or ENV are set, they are prepended as a shell preamble:
//
// cd '/the/dir' && KEY='val' /bin/sh -c 'original command'
func (c *ExecContext) WrappedCommand(cmd string) string {
prefix := c.shellPrefix()
if prefix == "" {
return cmd
}
return prefix + "/bin/sh -c " + shellescape(cmd)
}
// StartCommand returns the shell command for a START step. The process is
// launched in the background via nohup so that the outer shell exits
// immediately, allowing the build to continue. stdout/stderr of the
// background process are discarded (the process keeps running in the VM).
//
// Multiple START steps can be issued to run several background processes
// simultaneously before a healthcheck is evaluated.
func (c *ExecContext) StartCommand(cmd string) string {
prefix := c.shellPrefix()
return prefix + "nohup /bin/sh -c " + shellescape(cmd) + " >/dev/null 2>&1 &"
}
// shellPrefix builds the "cd ... && KEY=val " preamble for a shell command.
// Returns an empty string when no context is set.
func (c *ExecContext) shellPrefix() string {
if c.WorkDir == "" && len(c.EnvVars) == 0 {
return ""
}
var sb strings.Builder
if c.WorkDir != "" {
sb.WriteString("cd ")
sb.WriteString(shellescape(c.WorkDir))
sb.WriteString(" && ")
}
for k, v := range c.EnvVars {
sb.WriteString(k)
sb.WriteByte('=')
sb.WriteString(shellescape(v))
sb.WriteByte(' ')
}
return sb.String()
}
// shellescape wraps s in single quotes, escaping any embedded single quotes.
// This is POSIX-safe for paths, env values, and shell commands.
func shellescape(s string) string {
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
}

View File

@ -0,0 +1,114 @@
package recipe
import "testing"
func TestExecContext_WrappedCommand(t *testing.T) {
tests := []struct {
name string
ctx ExecContext
cmd string
want string
}{
{
name: "no context",
ctx: ExecContext{},
cmd: "apt install -y curl",
want: "apt install -y curl",
},
{
name: "workdir only",
ctx: ExecContext{WorkDir: "/app"},
cmd: "npm install",
want: "cd '/app' && /bin/sh -c 'npm install'",
},
{
name: "env only",
ctx: ExecContext{EnvVars: map[string]string{"PORT": "8080"}},
cmd: "node server.js",
want: "PORT='8080' /bin/sh -c 'node server.js'",
},
{
name: "workdir with space",
ctx: ExecContext{WorkDir: "/my project"},
cmd: "make build",
want: "cd '/my project' && /bin/sh -c 'make build'",
},
{
name: "command with single quotes",
ctx: ExecContext{WorkDir: "/app"},
cmd: "echo 'hello'",
want: "cd '/app' && /bin/sh -c 'echo '\\''hello'\\'''",
},
{
name: "env value with single quotes",
ctx: ExecContext{EnvVars: map[string]string{"MSG": "it's fine"}},
cmd: "echo $MSG",
want: "MSG='it'\\''s fine' /bin/sh -c 'echo $MSG'",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := tc.ctx.WrappedCommand(tc.cmd)
if got != tc.want {
t.Errorf("WrappedCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want)
}
})
}
}
func TestExecContext_StartCommand(t *testing.T) {
tests := []struct {
name string
ctx ExecContext
cmd string
want string
}{
{
name: "no context",
ctx: ExecContext{},
cmd: "python3 app.py",
want: "nohup /bin/sh -c 'python3 app.py' >/dev/null 2>&1 &",
},
{
name: "with workdir",
ctx: ExecContext{WorkDir: "/app"},
cmd: "python3 server.py",
want: "cd '/app' && nohup /bin/sh -c 'python3 server.py' >/dev/null 2>&1 &",
},
{
name: "with env",
ctx: ExecContext{EnvVars: map[string]string{"PORT": "9000"}},
cmd: "node index.js",
want: "PORT='9000' nohup /bin/sh -c 'node index.js' >/dev/null 2>&1 &",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := tc.ctx.StartCommand(tc.cmd)
if got != tc.want {
t.Errorf("StartCommand(%q)\n got %q\n want %q", tc.cmd, got, tc.want)
}
})
}
}
func TestShellescape(t *testing.T) {
tests := []struct {
input string
want string
}{
{"simple", "'simple'"},
{"/path/to/dir", "'/path/to/dir'"},
{"it's fine", "'it'\\''s fine'"},
{"", "''"},
{"a'b'c", "'a'\\''b'\\''c'"},
}
for _, tc := range tests {
got := shellescape(tc.input)
if got != tc.want {
t.Errorf("shellescape(%q) = %q, want %q", tc.input, got, tc.want)
}
}
}

185
internal/recipe/executor.go Normal file
View File

@ -0,0 +1,185 @@
package recipe
import (
"context"
"fmt"
"log/slog"
"strings"
"time"
"connectrpc.com/connect"
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
)
// DefaultStepTimeout is the fallback timeout for RUN steps that carry no
// explicit --timeout flag.
const DefaultStepTimeout = 30 * time.Second
// BuildLogEntry is the per-step record stored in template_builds.logs (JSONB).
type BuildLogEntry struct {
Step int `json:"step"`
Phase string `json:"phase"`
Cmd string `json:"cmd"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
Exit int32 `json:"exit"`
Ok bool `json:"ok"`
Elapsed int64 `json:"elapsed_ms"`
}
// ExecFunc is the agent.Exec call signature used by the executor. It matches
// the method on the hostagent Connect RPC client.
type ExecFunc func(ctx context.Context, req *connect.Request[pb.ExecRequest]) (*connect.Response[pb.ExecResponse], error)
// Execute runs steps sequentially against sandboxID using execFn.
//
// - phase labels the log entries (e.g., "pre-build", "recipe", "post-build").
// - startStep is the 1-based offset so entries are globally numbered across phases.
// - defaultTimeout applies to RUN steps with no per-step --timeout; 0 → 10 minutes.
// - bctx is mutated in place as ENV/WORKDIR steps execute, and carries forward
// into subsequent phases when the caller passes the same pointer.
//
// Returns all log entries appended during this call, the next step counter
// value, and whether all steps succeeded. On false the last entry contains
// failure details; the caller is responsible for destroying the sandbox and
// recording the build error.
func Execute(
ctx context.Context,
phase string,
steps []Step,
sandboxID string,
startStep int,
defaultTimeout time.Duration,
bctx *ExecContext,
execFn ExecFunc,
) (entries []BuildLogEntry, nextStep int, ok bool) {
if defaultTimeout <= 0 {
defaultTimeout = 10 * time.Minute
}
step := startStep
for _, st := range steps {
step++
slog.Info("executing build step", "phase", phase, "step", step, "instruction", st.Raw)
switch st.Kind {
case KindENV:
if bctx.EnvVars == nil {
bctx.EnvVars = make(map[string]string)
}
bctx.EnvVars[st.Key] = st.Value
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
case KindWORKDIR:
bctx.WorkDir = st.Path
entries = append(entries, BuildLogEntry{Step: step, Phase: phase, Cmd: st.Raw, Ok: true})
case KindUSER, KindCOPY:
verb := strings.ToUpper(strings.Fields(st.Raw)[0])
entries = append(entries, BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Stderr: verb + " is not yet supported",
Ok: false,
})
return entries, step, false
case KindSTART:
entry, succeeded := execStart(ctx, st, sandboxID, phase, step, bctx, execFn)
entries = append(entries, entry)
if !succeeded {
return entries, step, false
}
case KindRUN:
timeout := defaultTimeout
if st.Timeout > 0 {
timeout = st.Timeout
}
entry, succeeded := execRun(ctx, st, sandboxID, phase, step, timeout, bctx, execFn)
entries = append(entries, entry)
if !succeeded {
return entries, step, false
}
}
}
return entries, step, true
}
func execRun(
ctx context.Context,
st Step,
sandboxID, phase string,
step int,
timeout time.Duration,
bctx *ExecContext,
execFn ExecFunc,
) (BuildLogEntry, bool) {
execCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
start := time.Now()
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxID,
Cmd: "/bin/sh",
Args: []string{"-c", bctx.WrappedCommand(st.Shell)},
TimeoutSec: int32(timeout.Seconds()),
}))
entry := BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Elapsed: time.Since(start).Milliseconds(),
}
if err != nil {
entry.Stderr = fmt.Sprintf("exec error: %v", err)
return entry, false
}
entry.Stdout = string(resp.Msg.Stdout)
entry.Stderr = string(resp.Msg.Stderr)
entry.Exit = resp.Msg.ExitCode
entry.Ok = resp.Msg.ExitCode == 0
return entry, entry.Ok
}
func execStart(
ctx context.Context,
st Step,
sandboxID, phase string,
step int,
bctx *ExecContext,
execFn ExecFunc,
) (BuildLogEntry, bool) {
// START uses a short timeout: just long enough for the shell to fork and
// return. The background process itself runs indefinitely inside the VM.
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
start := time.Now()
resp, err := execFn(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxID,
Cmd: "/bin/sh",
Args: []string{"-c", bctx.StartCommand(st.Shell)},
TimeoutSec: 10,
}))
entry := BuildLogEntry{
Step: step,
Phase: phase,
Cmd: st.Raw,
Elapsed: time.Since(start).Milliseconds(),
}
if err != nil {
entry.Stderr = fmt.Sprintf("start error: %v", err)
return entry, false
}
entry.Exit = resp.Msg.ExitCode
entry.Ok = resp.Msg.ExitCode == 0
if !entry.Ok {
entry.Stderr = fmt.Sprintf("start failed with exit code %d: %s", resp.Msg.ExitCode, string(resp.Msg.Stderr))
}
return entry, entry.Ok
}

129
internal/recipe/step.go Normal file
View File

@ -0,0 +1,129 @@
package recipe
import (
"fmt"
"strings"
"time"
)
// Kind identifies the instruction type in a recipe line.
type Kind int
const (
KindRUN Kind = iota // Execute a command and wait for it to exit.
KindSTART // Start a command in the background (non-blocking).
KindENV // Set an environment variable for subsequent steps.
KindWORKDIR // Set the working directory for subsequent steps.
KindUSER // Switch the unix user for subsequent steps. (stub)
KindCOPY // Copy files into the sandbox. (stub)
)
// Step is the parsed representation of one recipe instruction.
type Step struct {
Kind Kind
Raw string // original string, preserved for logging
Shell string // KindRUN, KindSTART: the shell command text
Timeout time.Duration // KindRUN: 0 means use caller's default
Key string // KindENV: variable name
Value string // KindENV: variable value
Path string // KindWORKDIR: directory path
}
// ParseStep parses a single recipe instruction string into a Step.
// Instructions are Dockerfile-like: a keyword followed by arguments.
//
// Supported syntax:
//
// RUN <cmd> — run command, wait for exit
// RUN --timeout=<d> <cmd> — run command with explicit timeout (e.g. --timeout=5m)
// START <cmd> — start command in background, return immediately
// ENV <key>=<value> — set environment variable
// WORKDIR <path> — set working directory
// USER <name> — not yet supported
// COPY <src> <dst> — not yet supported
func ParseStep(s string) (Step, error) {
s = strings.TrimSpace(s)
if s == "" {
return Step{}, fmt.Errorf("empty step")
}
// Split on first space to get the keyword.
keyword, rest, _ := strings.Cut(s, " ")
rest = strings.TrimSpace(rest)
switch strings.ToUpper(keyword) {
case "RUN":
return parseRUN(s, rest)
case "START":
return parseSTART(s, rest)
case "ENV":
return parseENV(s, rest)
case "WORKDIR":
return parseWORKDIR(s, rest)
case "USER":
return Step{Kind: KindUSER, Raw: s}, nil
case "COPY":
return Step{Kind: KindCOPY, Raw: s}, nil
default:
return Step{}, fmt.Errorf("unknown instruction %q (expected RUN, START, ENV, WORKDIR, USER, or COPY)", keyword)
}
}
// ParseRecipe parses all recipe lines, returning on the first error.
func ParseRecipe(lines []string) ([]Step, error) {
steps := make([]Step, 0, len(lines))
for i, line := range lines {
st, err := ParseStep(line)
if err != nil {
return nil, fmt.Errorf("recipe line %d: %w", i+1, err)
}
steps = append(steps, st)
}
return steps, nil
}
func parseRUN(raw, rest string) (Step, error) {
var timeout time.Duration
if strings.HasPrefix(rest, "--timeout=") {
rest = rest[len("--timeout="):]
flag, cmd, found := strings.Cut(rest, " ")
if !found || strings.TrimSpace(cmd) == "" {
return Step{}, fmt.Errorf("RUN --timeout= flag has no command: %q", raw)
}
d, err := time.ParseDuration(flag)
if err != nil {
return Step{}, fmt.Errorf("RUN --timeout= invalid duration %q: %w", flag, err)
}
timeout = d
rest = strings.TrimSpace(cmd)
}
if rest == "" {
return Step{}, fmt.Errorf("RUN requires a command: %q", raw)
}
return Step{Kind: KindRUN, Raw: raw, Shell: rest, Timeout: timeout}, nil
}
func parseSTART(raw, rest string) (Step, error) {
if rest == "" {
return Step{}, fmt.Errorf("START requires a command: %q", raw)
}
return Step{Kind: KindSTART, Raw: raw, Shell: rest}, nil
}
func parseENV(raw, rest string) (Step, error) {
key, value, found := strings.Cut(rest, "=")
if !found {
return Step{}, fmt.Errorf("ENV requires KEY=VALUE format: %q", raw)
}
if key == "" {
return Step{}, fmt.Errorf("ENV key is empty: %q", raw)
}
return Step{Kind: KindENV, Raw: raw, Key: key, Value: value}, nil
}
func parseWORKDIR(raw, path string) (Step, error) {
if path == "" {
return Step{}, fmt.Errorf("WORKDIR requires a path: %q", raw)
}
return Step{Kind: KindWORKDIR, Raw: raw, Path: path}, nil
}

View File

@ -0,0 +1,208 @@
package recipe
import (
"testing"
"time"
)
func TestParseStep(t *testing.T) {
tests := []struct {
name string
input string
want Step
wantErr bool
}{
// RUN
{
name: "RUN basic",
input: "RUN apt install -y curl",
want: Step{Kind: KindRUN, Raw: "RUN apt install -y curl", Shell: "apt install -y curl"},
},
{
name: "RUN lowercase",
input: "run echo hello",
want: Step{Kind: KindRUN, Raw: "run echo hello", Shell: "echo hello"},
},
{
name: "RUN with timeout",
input: "RUN --timeout=5m npm install",
want: Step{Kind: KindRUN, Raw: "RUN --timeout=5m npm install", Shell: "npm install", Timeout: 5 * time.Minute},
},
{
name: "RUN with timeout seconds",
input: "RUN --timeout=30s make build",
want: Step{Kind: KindRUN, Raw: "RUN --timeout=30s make build", Shell: "make build", Timeout: 30 * time.Second},
},
{
name: "RUN no command",
input: "RUN",
wantErr: true,
},
{
name: "RUN timeout no command",
input: "RUN --timeout=5m",
wantErr: true,
},
{
name: "RUN invalid timeout",
input: "RUN --timeout=notaduration echo hi",
wantErr: true,
},
// START
{
name: "START basic",
input: "START python3 app.py",
want: Step{Kind: KindSTART, Raw: "START python3 app.py", Shell: "python3 app.py"},
},
{
name: "START uppercase",
input: "START node server.js --port=8080",
want: Step{Kind: KindSTART, Raw: "START node server.js --port=8080", Shell: "node server.js --port=8080"},
},
{
name: "START no command",
input: "START",
wantErr: true,
},
// ENV
{
name: "ENV basic",
input: "ENV FOO=bar",
want: Step{Kind: KindENV, Raw: "ENV FOO=bar", Key: "FOO", Value: "bar"},
},
{
name: "ENV value with spaces",
input: "ENV GREETING=hello world",
want: Step{Kind: KindENV, Raw: "ENV GREETING=hello world", Key: "GREETING", Value: "hello world"},
},
{
name: "ENV value with equals sign",
input: "ENV URL=http://example.com?a=1",
want: Step{Kind: KindENV, Raw: "ENV URL=http://example.com?a=1", Key: "URL", Value: "http://example.com?a=1"},
},
{
name: "ENV empty value",
input: "ENV FOO=",
want: Step{Kind: KindENV, Raw: "ENV FOO=", Key: "FOO", Value: ""},
},
{
name: "ENV missing equals",
input: "ENV FOO",
wantErr: true,
},
{
name: "ENV empty key",
input: "ENV =value",
wantErr: true,
},
// WORKDIR
{
name: "WORKDIR basic",
input: "WORKDIR /app",
want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /app", Path: "/app"},
},
{
name: "WORKDIR with spaces in path",
input: "WORKDIR /my project",
want: Step{Kind: KindWORKDIR, Raw: "WORKDIR /my project", Path: "/my project"},
},
{
name: "WORKDIR empty",
input: "WORKDIR",
wantErr: true,
},
// USER and COPY stubs
{
name: "USER stub",
input: "USER www-data",
want: Step{Kind: KindUSER, Raw: "USER www-data"},
},
{
name: "COPY stub",
input: "COPY config.yaml /etc/app/config.yaml",
want: Step{Kind: KindCOPY, Raw: "COPY config.yaml /etc/app/config.yaml"},
},
// Unknown keyword
{
name: "unknown keyword",
input: "FROBNICATE something",
wantErr: true,
},
// Empty input
{
name: "empty string",
input: "",
wantErr: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got, err := ParseStep(tc.input)
if tc.wantErr {
if err == nil {
t.Fatalf("ParseStep(%q) expected error, got %+v", tc.input, got)
}
return
}
if err != nil {
t.Fatalf("ParseStep(%q) unexpected error: %v", tc.input, err)
}
if got != tc.want {
t.Errorf("ParseStep(%q)\n got %+v\n want %+v", tc.input, got, tc.want)
}
})
}
}
func TestParseRecipe(t *testing.T) {
t.Run("valid recipe", func(t *testing.T) {
lines := []string{
"RUN apt update",
"WORKDIR /app",
"ENV PORT=8080",
"START python3 server.py",
"RUN --timeout=2m pip install -r requirements.txt",
}
steps, err := ParseRecipe(lines)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(steps) != 5 {
t.Fatalf("expected 5 steps, got %d", len(steps))
}
if steps[0].Kind != KindRUN {
t.Errorf("step 0: want KindRUN, got %v", steps[0].Kind)
}
if steps[1].Kind != KindWORKDIR {
t.Errorf("step 1: want KindWORKDIR, got %v", steps[1].Kind)
}
if steps[3].Kind != KindSTART {
t.Errorf("step 3: want KindSTART, got %v", steps[3].Kind)
}
if steps[4].Timeout != 2*time.Minute {
t.Errorf("step 4: want 2m timeout, got %v", steps[4].Timeout)
}
})
t.Run("error on invalid line", func(t *testing.T) {
lines := []string{
"RUN apt update",
"BADCMD something",
}
_, err := ParseRecipe(lines)
if err == nil {
t.Fatal("expected error for invalid line, got nil")
}
})
t.Run("empty recipe", func(t *testing.T) {
steps, err := ParseRecipe(nil)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(steps) != 0 {
t.Fatalf("expected 0 steps, got %d", len(steps))
}
})
}

View File

@ -5,6 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog" "log/slog"
"sync"
"time" "time"
"connectrpc.com/connect" "connectrpc.com/connect"
@ -14,6 +15,7 @@ import (
"git.omukk.dev/wrenn/sandbox/internal/db" "git.omukk.dev/wrenn/sandbox/internal/db"
"git.omukk.dev/wrenn/sandbox/internal/id" "git.omukk.dev/wrenn/sandbox/internal/id"
"git.omukk.dev/wrenn/sandbox/internal/lifecycle" "git.omukk.dev/wrenn/sandbox/internal/lifecycle"
"git.omukk.dev/wrenn/sandbox/internal/recipe"
"git.omukk.dev/wrenn/sandbox/internal/scheduler" "git.omukk.dev/wrenn/sandbox/internal/scheduler"
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen" pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
) )
@ -27,14 +29,14 @@ const (
// preBuildCmds run before the user recipe to prepare the build environment. // preBuildCmds run before the user recipe to prepare the build environment.
var preBuildCmds = []string{ var preBuildCmds = []string{
"apt update", "RUN apt update",
} }
// postBuildCmds run after the user recipe to clean up caches and reduce image size. // postBuildCmds run after the user recipe to clean up caches and reduce image size.
var postBuildCmds = []string{ var postBuildCmds = []string{
"apt clean", "RUN apt clean",
"apt autoremove -y", "RUN apt autoremove -y",
"rm -rf /var/lib/apt/lists/*", "RUN rm -rf /var/lib/apt/lists/*",
} }
// buildAgentClient is the subset of the host agent client used by the build worker. // buildAgentClient is the subset of the host agent client used by the build worker.
@ -46,24 +48,15 @@ type buildAgentClient interface {
FlattenRootfs(ctx context.Context, req *connect.Request[pb.FlattenRootfsRequest]) (*connect.Response[pb.FlattenRootfsResponse], error) FlattenRootfs(ctx context.Context, req *connect.Request[pb.FlattenRootfsRequest]) (*connect.Response[pb.FlattenRootfsResponse], error)
} }
// BuildLogEntry represents a single entry in the build log JSONB array.
type BuildLogEntry struct {
Step int `json:"step"`
Phase string `json:"phase"` // "pre-build", "recipe", or "post-build"
Cmd string `json:"cmd"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
Exit int32 `json:"exit"`
Ok bool `json:"ok"`
Elapsed int64 `json:"elapsed_ms"`
}
// BuildService handles template build orchestration. // BuildService handles template build orchestration.
type BuildService struct { type BuildService struct {
DB *db.Queries DB *db.Queries
Redis *redis.Client Redis *redis.Client
Pool *lifecycle.HostClientPool Pool *lifecycle.HostClientPool
Scheduler scheduler.HostScheduler Scheduler scheduler.HostScheduler
mu sync.Mutex
cancelMap map[string]context.CancelFunc // buildID → per-build cancel func
} }
// BuildCreateParams holds the parameters for creating a template build. // BuildCreateParams holds the parameters for creating a template build.
@ -74,6 +67,7 @@ type BuildCreateParams struct {
Healthcheck string Healthcheck string
VCPUs int32 VCPUs int32
MemoryMB int32 MemoryMB int32
SkipPrePost bool
} }
// Create inserts a new build record and enqueues it to Redis. // Create inserts a new build record and enqueues it to Redis.
@ -97,6 +91,11 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp
buildIDStr := id.FormatBuildID(buildID) buildIDStr := id.FormatBuildID(buildID)
newTemplateID := id.NewTemplateID() newTemplateID := id.NewTemplateID()
defaultSteps := len(preBuildCmds) + len(postBuildCmds)
if p.SkipPrePost {
defaultSteps = 0
}
build, err := s.DB.InsertTemplateBuild(ctx, db.InsertTemplateBuildParams{ build, err := s.DB.InsertTemplateBuild(ctx, db.InsertTemplateBuildParams{
ID: buildID, ID: buildID,
Name: p.Name, Name: p.Name,
@ -105,9 +104,10 @@ func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.Temp
Healthcheck: p.Healthcheck, Healthcheck: p.Healthcheck,
Vcpus: p.VCPUs, Vcpus: p.VCPUs,
MemoryMb: p.MemoryMB, MemoryMb: p.MemoryMB,
TotalSteps: int32(len(p.Recipe) + len(preBuildCmds) + len(postBuildCmds)), TotalSteps: int32(len(p.Recipe) + defaultSteps),
TemplateID: newTemplateID, TemplateID: newTemplateID,
TeamID: id.PlatformTeamID, TeamID: id.PlatformTeamID,
SkipPrePost: p.SkipPrePost,
}) })
if err != nil { if err != nil {
return db.TemplateBuild{}, fmt.Errorf("insert build: %w", err) return db.TemplateBuild{}, fmt.Errorf("insert build: %w", err)
@ -131,6 +131,40 @@ func (s *BuildService) List(ctx context.Context) ([]db.TemplateBuild, error) {
return s.DB.ListTemplateBuilds(ctx) return s.DB.ListTemplateBuilds(ctx)
} }
// Cancel cancels a pending or running build. For pending builds the status is
// updated in the DB and the worker skips it when dequeued. For running builds
// the per-build context is cancelled, which causes the current exec step to
// abort; executeBuild then detects the cancellation and records the status.
func (s *BuildService) Cancel(ctx context.Context, buildID pgtype.UUID) error {
build, err := s.DB.GetTemplateBuild(ctx, buildID)
if err != nil {
return fmt.Errorf("get build: %w", err)
}
switch build.Status {
case "success", "failed", "cancelled":
return fmt.Errorf("build is already %s", build.Status)
}
// Mark cancelled in DB first. This handles both pending builds (which haven't
// been picked up yet) and acts as a flag for executeBuild to check on start.
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{
ID: buildID, Status: "cancelled",
}); err != nil {
return fmt.Errorf("update build status: %w", err)
}
// If the build is currently running, signal its context.
buildIDStr := id.FormatBuildID(buildID)
s.mu.Lock()
cancel, running := s.cancelMap[buildIDStr]
s.mu.Unlock()
if running {
cancel()
}
return nil
}
// StartWorkers launches n goroutines that consume from the Redis build queue. // StartWorkers launches n goroutines that consume from the Redis build queue.
// The returned cancel function stops all workers. // The returned cancel function stops all workers.
func (s *BuildService) StartWorkers(ctx context.Context, n int) context.CancelFunc { func (s *BuildService) StartWorkers(ctx context.Context, n int) context.CancelFunc {
@ -172,14 +206,38 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
return return
} }
build, err := s.DB.GetTemplateBuild(ctx, buildID) // Create a per-build context so this build can be cancelled independently of
// the worker. Register in cancelMap before fetching the build so that a
// concurrent Cancel call can always find and signal it.
buildCtx, buildCancel := context.WithCancel(ctx)
defer buildCancel()
s.mu.Lock()
if s.cancelMap == nil {
s.cancelMap = make(map[string]context.CancelFunc)
}
s.cancelMap[buildIDStr] = buildCancel
s.mu.Unlock()
defer func() {
s.mu.Lock()
delete(s.cancelMap, buildIDStr)
s.mu.Unlock()
}()
build, err := s.DB.GetTemplateBuild(buildCtx, buildID)
if err != nil { if err != nil {
log.Error("failed to fetch build", "error", err) log.Error("failed to fetch build", "error", err)
return return
} }
// Skip if already cancelled (Cancel was called before we dequeued).
if build.Status == "cancelled" {
log.Info("build already cancelled, skipping")
return
}
// Mark as running. // Mark as running.
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{
ID: buildID, Status: "running", ID: buildID, Status: "running",
}); err != nil { }); err != nil {
log.Error("failed to update build status", "error", err) log.Error("failed to update build status", "error", err)
@ -187,22 +245,22 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
} }
// Parse user recipe. // Parse user recipe.
var recipe []string var userRecipe []string
if err := json.Unmarshal(build.Recipe, &recipe); err != nil { if err := json.Unmarshal(build.Recipe, &userRecipe); err != nil {
s.failBuild(ctx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err))
return return
} }
// Pick a platform host and create a sandbox. // Pick a platform host and create a sandbox.
host, err := s.Scheduler.SelectHost(ctx, id.PlatformTeamID, false) host, err := s.Scheduler.SelectHost(buildCtx, id.PlatformTeamID, false)
if err != nil { if err != nil {
s.failBuild(ctx, buildID, fmt.Sprintf("no host available: %v", err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("no host available: %v", err))
return return
} }
agent, err := s.Pool.GetForHost(host) agent, err := s.Pool.GetForHost(host)
if err != nil { if err != nil {
s.failBuild(ctx, buildID, fmt.Sprintf("agent client error: %v", err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("agent client error: %v", err))
return return
} }
@ -214,16 +272,16 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
baseTeamID := id.PlatformTeamID baseTeamID := id.PlatformTeamID
baseTemplateID := id.MinimalTemplateID baseTemplateID := id.MinimalTemplateID
if build.BaseTemplate != "minimal" { if build.BaseTemplate != "minimal" {
baseTmpl, err := s.DB.GetPlatformTemplateByName(ctx, build.BaseTemplate) baseTmpl, err := s.DB.GetPlatformTemplateByName(buildCtx, build.BaseTemplate)
if err != nil { if err != nil {
s.failBuild(ctx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("base template %q not found: %v", build.BaseTemplate, err))
return return
} }
baseTeamID = baseTmpl.TeamID baseTeamID = baseTmpl.TeamID
baseTemplateID = baseTmpl.ID baseTemplateID = baseTmpl.ID
} }
resp, err := agent.CreateSandbox(ctx, connect.NewRequest(&pb.CreateSandboxRequest{ resp, err := agent.CreateSandbox(buildCtx, connect.NewRequest(&pb.CreateSandboxRequest{
SandboxId: sandboxIDStr, SandboxId: sandboxIDStr,
Template: build.BaseTemplate, Template: build.BaseTemplate,
TeamId: id.UUIDString(baseTeamID), TeamId: id.UUIDString(baseTeamID),
@ -234,129 +292,121 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
DiskSizeMb: 5120, // 5 GB for template builds DiskSizeMb: 5120, // 5 GB for template builds
})) }))
if err != nil { if err != nil {
s.failBuild(ctx, buildID, fmt.Sprintf("create sandbox failed: %v", err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("create sandbox failed: %v", err))
return return
} }
_ = resp _ = resp
// Record sandbox/host association. // Record sandbox/host association.
_ = s.DB.UpdateBuildSandbox(ctx, db.UpdateBuildSandboxParams{ _ = s.DB.UpdateBuildSandbox(buildCtx, db.UpdateBuildSandboxParams{
ID: buildID, ID: buildID,
SandboxID: sandboxID, SandboxID: sandboxID,
HostID: host.ID, HostID: host.ID,
}) })
// Execute build phases: pre-build → user recipe → post-build. // Parse recipe steps. preBuildCmds and postBuildCmds are hardcoded and always
var logs []BuildLogEntry // valid; panic on error is appropriate here since it would be a programmer mistake.
step := 0 preBuildSteps, err := recipe.ParseRecipe(preBuildCmds)
// Helper to run a list of commands in a given phase.
// timeout=0 means no timeout (uses parent context).
runPhase := func(phase string, cmds []string, timeout time.Duration) bool {
for _, cmd := range cmds {
step++
log.Info("executing build step", "phase", phase, "step", step, "cmd", cmd)
execCtx := ctx
var cancel context.CancelFunc
// When no timeout is specified, use 10 minutes as a generous upper
// bound. The host agent defaults TimeoutSec=0 to 30s, so we must
// always send an explicit value.
effectiveTimeout := timeout
if effectiveTimeout <= 0 {
effectiveTimeout = 10 * time.Minute
}
execCtx, cancel = context.WithTimeout(ctx, effectiveTimeout)
timeoutSec := int32(effectiveTimeout.Seconds())
start := time.Now()
execResp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{
SandboxId: sandboxIDStr,
Cmd: "/bin/sh",
Args: []string{"-c", cmd},
TimeoutSec: timeoutSec,
}))
cancel()
entry := BuildLogEntry{
Step: step,
Phase: phase,
Cmd: cmd,
Elapsed: time.Since(start).Milliseconds(),
}
if err != nil { if err != nil {
entry.Stderr = err.Error() panic(fmt.Sprintf("invalid pre-build recipe: %v", err))
entry.Ok = false }
logs = append(logs, entry) userRecipeSteps, err := recipe.ParseRecipe(userRecipe)
s.updateLogs(ctx, buildID, step, logs) if err != nil {
s.destroySandbox(ctx, agent, sandboxIDStr) s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed: %v", phase, step, err)) s.failBuild(buildCtx, buildID, fmt.Sprintf("recipe parse error: %v", err))
return
}
postBuildSteps, err := recipe.ParseRecipe(postBuildCmds)
if err != nil {
panic(fmt.Sprintf("invalid post-build recipe: %v", err))
}
// Execute build phases: pre-build → user recipe → post-build.
// bctx carries working directory and env vars across all phases.
var logs []recipe.BuildLogEntry
step := 0
bctx := &recipe.ExecContext{}
runPhase := func(phase string, steps []recipe.Step, defaultTimeout time.Duration) bool {
newEntries, nextStep, ok := recipe.Execute(buildCtx, phase, steps, sandboxIDStr, step, defaultTimeout, bctx, agent.Exec)
logs = append(logs, newEntries...)
step = nextStep
s.updateLogs(buildCtx, buildID, step, logs)
if !ok {
s.destroySandbox(buildCtx, agent, sandboxIDStr)
// If the build was cancelled, status is already set — don't overwrite with "failed".
if buildCtx.Err() != nil {
return false return false
} }
last := newEntries[len(newEntries)-1]
entry.Stdout = string(execResp.Msg.Stdout) reason := last.Stderr
entry.Stderr = string(execResp.Msg.Stderr) if reason == "" {
entry.Exit = execResp.Msg.ExitCode reason = fmt.Sprintf("exit code %d", last.Exit)
entry.Ok = execResp.Msg.ExitCode == 0
logs = append(logs, entry)
s.updateLogs(ctx, buildID, step, logs)
if execResp.Msg.ExitCode != 0 {
s.destroySandbox(ctx, agent, sandboxIDStr)
s.failBuild(ctx, buildID, fmt.Sprintf("%s step %d failed with exit code %d", phase, step, execResp.Msg.ExitCode))
return false
} }
s.failBuild(buildCtx, buildID, fmt.Sprintf("%s step %d failed: %s", phase, step, reason))
} }
return true return ok
} }
if !runPhase("pre-build", preBuildCmds, 0) { if !build.SkipPrePost {
if !runPhase("pre-build", preBuildSteps, 0) {
return return
} }
if !runPhase("recipe", recipe, buildCommandTimeout) { }
if !runPhase("recipe", userRecipeSteps, buildCommandTimeout) {
return return
} }
if !runPhase("post-build", postBuildCmds, 0) { if !build.SkipPrePost {
if !runPhase("post-build", postBuildSteps, 0) {
return return
} }
}
// Healthcheck or direct snapshot. // Healthcheck or direct snapshot.
var sizeBytes int64 var sizeBytes int64
if build.Healthcheck != "" { if build.Healthcheck != "" {
log.Info("running healthcheck", "cmd", build.Healthcheck) log.Info("running healthcheck", "cmd", build.Healthcheck)
if err := s.waitForHealthcheck(ctx, agent, sandboxIDStr, build.Healthcheck); err != nil { if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, build.Healthcheck); err != nil {
s.destroySandbox(ctx, agent, sandboxIDStr) s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(ctx, buildID, fmt.Sprintf("healthcheck failed: %v", err)) if buildCtx.Err() != nil {
return
}
s.failBuild(buildCtx, buildID, fmt.Sprintf("healthcheck failed: %v", err))
return return
} }
// Healthcheck passed → full snapshot (with memory/CPU state). // Healthcheck passed → full snapshot (with memory/CPU state).
log.Info("healthcheck passed, creating snapshot") log.Info("healthcheck passed, creating snapshot")
snapResp, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{ snapResp, err := agent.CreateSnapshot(buildCtx, connect.NewRequest(&pb.CreateSnapshotRequest{
SandboxId: sandboxIDStr, SandboxId: sandboxIDStr,
Name: build.Name, Name: build.Name,
TeamId: id.UUIDString(build.TeamID), TeamId: id.UUIDString(build.TeamID),
TemplateId: id.UUIDString(build.TemplateID), TemplateId: id.UUIDString(build.TemplateID),
})) }))
if err != nil { if err != nil {
s.destroySandbox(ctx, agent, sandboxIDStr) s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err)) if buildCtx.Err() != nil {
return
}
s.failBuild(buildCtx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
return return
} }
sizeBytes = snapResp.Msg.SizeBytes sizeBytes = snapResp.Msg.SizeBytes
} else { } else {
// No healthcheck → image-only template (rootfs only). // No healthcheck → image-only template (rootfs only).
log.Info("no healthcheck, flattening rootfs") log.Info("no healthcheck, flattening rootfs")
flatResp, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{ flatResp, err := agent.FlattenRootfs(buildCtx, connect.NewRequest(&pb.FlattenRootfsRequest{
SandboxId: sandboxIDStr, SandboxId: sandboxIDStr,
Name: build.Name, Name: build.Name,
TeamId: id.UUIDString(build.TeamID), TeamId: id.UUIDString(build.TeamID),
TemplateId: id.UUIDString(build.TemplateID), TemplateId: id.UUIDString(build.TemplateID),
})) }))
if err != nil { if err != nil {
s.destroySandbox(ctx, agent, sandboxIDStr) s.destroySandbox(buildCtx, agent, sandboxIDStr)
s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err)) if buildCtx.Err() != nil {
return
}
s.failBuild(buildCtx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
return return
} }
sizeBytes = flatResp.Msg.SizeBytes sizeBytes = flatResp.Msg.SizeBytes
@ -368,7 +418,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
templateType = "snapshot" templateType = "snapshot"
} }
if _, err := s.DB.InsertTemplate(ctx, db.InsertTemplateParams{ if _, err := s.DB.InsertTemplate(buildCtx, db.InsertTemplateParams{
ID: build.TemplateID, ID: build.TemplateID,
Name: build.Name, Name: build.Name,
Type: templateType, Type: templateType,
@ -386,7 +436,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) {
// No additional destroy needed. // No additional destroy needed.
// Mark build as success. // Mark build as success.
if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ if _, err := s.DB.UpdateBuildStatus(buildCtx, db.UpdateBuildStatusParams{
ID: buildID, Status: "success", ID: buildID, Status: "success",
}); err != nil { }); err != nil {
log.Error("failed to mark build as success", "error", err) log.Error("failed to mark build as success", "error", err)
@ -429,7 +479,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC
} }
} }
func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []BuildLogEntry) { func (s *BuildService) updateLogs(ctx context.Context, buildID pgtype.UUID, step int, logs []recipe.BuildLogEntry) {
logsJSON, err := json.Marshal(logs) logsJSON, err := json.Marshal(logs)
if err != nil { if err != nil {
slog.Warn("failed to marshal build logs", "error", err) slog.Warn("failed to marshal build logs", "error", err)