diff --git a/cmd/control-plane/main.go b/cmd/control-plane/main.go index 40c3c48..af57d2b 100644 --- a/cmd/control-plane/main.go +++ b/cmd/control-plane/main.go @@ -90,6 +90,10 @@ func main() { // API server. srv := api.New(queries, hostPool, hostScheduler, pool, rdb, []byte(cfg.JWTSecret), oauthRegistry, cfg.OAuthRedirectURL) + // Start template build workers (2 concurrent). + stopBuildWorkers := srv.BuildSvc.StartWorkers(ctx, 2) + defer stopBuildWorkers() + // Start host monitor (passive + active reconciliation every 30s). monitor := api.NewHostMonitor(queries, hostPool, audit.New(queries), 30*time.Second) monitor.Start(ctx) diff --git a/db/migrations/20260326090649_template_builds.sql b/db/migrations/20260326090649_template_builds.sql new file mode 100644 index 0000000..8e5326d --- /dev/null +++ b/db/migrations/20260326090649_template_builds.sql @@ -0,0 +1,25 @@ +-- +goose Up + +CREATE TABLE template_builds ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + base_template TEXT NOT NULL DEFAULT 'minimal', + recipe JSONB NOT NULL DEFAULT '[]', + healthcheck TEXT, + vcpus INTEGER NOT NULL DEFAULT 1, + memory_mb INTEGER NOT NULL DEFAULT 512, + status TEXT NOT NULL DEFAULT 'pending', + current_step INTEGER NOT NULL DEFAULT 0, + total_steps INTEGER NOT NULL DEFAULT 0, + logs JSONB NOT NULL DEFAULT '[]', + error TEXT, + sandbox_id TEXT, + host_id TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ +); + +-- +goose Down + +DROP TABLE template_builds; diff --git a/db/queries/template_builds.sql b/db/queries/template_builds.sql new file mode 100644 index 0000000..ead4d92 --- /dev/null +++ b/db/queries/template_builds.sql @@ -0,0 +1,33 @@ +-- name: InsertTemplateBuild :one +INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps) +VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8) +RETURNING *; + +-- name: GetTemplateBuild :one +SELECT * FROM template_builds WHERE id = $1; + +-- name: ListTemplateBuilds :many +SELECT * FROM template_builds ORDER BY created_at DESC; + +-- name: UpdateBuildStatus :one +UPDATE template_builds +SET status = $2, + started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, + completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END +WHERE id = $1 +RETURNING *; + +-- name: UpdateBuildProgress :exec +UPDATE template_builds +SET current_step = $2, logs = $3 +WHERE id = $1; + +-- name: UpdateBuildSandbox :exec +UPDATE template_builds +SET sandbox_id = $2, host_id = $3 +WHERE id = $1; + +-- name: UpdateBuildError :exec +UPDATE template_builds +SET error = $2, status = 'failed', completed_at = NOW() +WHERE id = $1; diff --git a/frontend/src/lib/api/builds.ts b/frontend/src/lib/api/builds.ts new file mode 100644 index 0000000..d826b36 --- /dev/null +++ b/frontend/src/lib/api/builds.ts @@ -0,0 +1,52 @@ +import { apiFetch, type ApiResult } from '$lib/api/client'; + +export type BuildLogEntry = { + step: number; + cmd: string; + stdout: string; + stderr: string; + exit: number; + ok: boolean; + elapsed_ms: number; +}; + +export type Build = { + id: string; + name: string; + base_template: string; + recipe: string[]; + healthcheck?: string; + vcpus: number; + memory_mb: number; + status: string; + current_step: number; + total_steps: number; + logs: BuildLogEntry[]; + error?: string; + sandbox_id?: string; + host_id?: string; + created_at: string; + started_at?: string; + completed_at?: string; +}; + +export type CreateBuildParams = { + name: string; + base_template?: string; + recipe: string[]; + healthcheck?: string; + vcpus?: number; + memory_mb?: number; +}; + +export async function createBuild(params: CreateBuildParams): Promise> { + return apiFetch('POST', '/api/v1/admin/builds', params); +} + +export async function listBuilds(): Promise> { + return apiFetch('GET', '/api/v1/admin/builds'); +} + +export async function getBuild(id: string): Promise> { + return apiFetch('GET', `/api/v1/admin/builds/${id}`); +} diff --git a/frontend/src/lib/components/AdminSidebar.svelte b/frontend/src/lib/components/AdminSidebar.svelte index 4bed5cc..ebf4b64 100644 --- a/frontend/src/lib/components/AdminSidebar.svelte +++ b/frontend/src/lib/components/AdminSidebar.svelte @@ -3,6 +3,7 @@ import { auth } from '$lib/auth.svelte'; import { IconServer, + IconTemplate, IconSettings, IconLogout, IconSidebar, @@ -21,7 +22,8 @@ }; const managementItems: NavItem[] = [ - { label: 'Hosts', icon: IconServer, href: '/admin/hosts' } + { label: 'Hosts', icon: IconServer, href: '/admin/hosts' }, + { label: 'Templates', icon: IconTemplate, href: '/admin/templates' } ]; function isActive(href: string): boolean { diff --git a/frontend/src/routes/admin/templates/+page.svelte b/frontend/src/routes/admin/templates/+page.svelte new file mode 100644 index 0000000..904bbad --- /dev/null +++ b/frontend/src/routes/admin/templates/+page.svelte @@ -0,0 +1,837 @@ + + +
+ + +
+ +
+
+
+

+ Templates +

+

+ Build and manage global templates available to all teams. +

+
+ +
+ + + {#if !templatesLoading && !templatesError} +
+
+ {templateCount} + templates +
+
+ {baseCount} + base +
+
+ {snapshotCount} + snapshots +
+ {#if runningBuilds > 0} +
+ + + + + {runningBuilds} + building +
+ {/if} +
+ {/if} +
+ + +
+ {#each [['templates', 'Templates', templateCount], ['builds', 'Builds', builds.length]] as [id, label, count] (id)} + + {/each} +
+ + +
+ {#if activeTab === 'templates'} + {#if templatesLoading} + {@render skeletonRows(5, ['Name', 'Type', 'Specs', 'Size', 'Created', ''])} + {:else if templatesError} +
+ {templatesError} +
+ {:else if templates.length === 0} + {@render emptyState('templates')} + {:else} + {@render templatesTable()} + {/if} + {:else} + {#if buildsLoading} + {@render skeletonRows(4, ['Build', 'Name', 'Status', 'Progress', 'Started', 'Duration'])} + {:else if buildsError} +
+ {buildsError} +
+ {:else if builds.length === 0} + {@render emptyState('builds')} + {:else} + {@render buildsTable()} + {/if} + {/if} +
+
+
+ + + +{#snippet skeletonRows(count: number, headers: string[])} +
+ + + + {#each headers as h} + + {/each} + + + + {#each Array(count) as _, i} + + {#each headers as _h, j} + + {/each} + + {/each} + +
{h}
+
+
+
+{/snippet} + +{#snippet emptyState(type: 'templates' | 'builds')} +
+
+ {#if type === 'templates'} + + {:else} + + {/if} +
+

+ {type === 'templates' ? 'No templates yet.' : 'No builds yet.'} +

+

+ {type === 'templates' + ? 'Create a template to provide pre-configured environments for all teams.' + : 'Start a template build to see progress and logs here.'} +

+
+{/snippet} + +{#snippet templatesTable()} +
+ + + + + + + + + + + + + {#each templates as tmpl (tmpl.name)} + + + + + + + + + {/each} + +
NameType
+ {tmpl.name} + + {#if tmpl.type === 'snapshot'} + + snapshot + + {:else} + + base + + {/if} + + {#if tmpl.type === 'snapshot'} + + {/if} +
+
+{/snippet} + +{#snippet buildsTable()} +
+ + + + + + + + + + + + + + {#each builds as build (build.id)} + toggleBuildExpand(build.id)} + > + + + + + + + + + + {#if expandedBuildId === build.id} + + + + {/if} + {/each} + +
BuildNameStatus
+
+ + + + {build.id} +
+
+ {build.name} + + + {#if build.status === 'running'} + + + + + {:else if build.status === 'success'} + + {:else if build.status === 'failed'} + + {:else} + + {/if} + {build.status} + +
+
+ {#if build.error} +
+ {build.error} +
+ {/if} + + {#if build.logs && build.logs.length > 0} +
+ {#each build.logs as log, i (i)} +
+ + + + + {#if expandedSteps.has(log.step)} +
+ {#if log.stdout} +
+ stdout +
{log.stdout}
+
+ {/if} + {#if log.stderr} +
+ stderr +
{log.stderr}
+
+ {/if} + {#if !log.stdout && !log.stderr} + No output + {/if} +
+ {/if} +
+ {/each} +
+ {:else} +
+ {#if build.status === 'pending' || build.status === 'running'} + + {build.status === 'pending' ? 'Waiting for worker…' : 'Running…'} + {:else} + No build logs recorded. + {/if} +
+ {/if} + + + {#if build.recipe && build.recipe.length > 0} +
+ Recipe +
+ {#each build.recipe as cmd, i} +
+ {i + 1}. + {cmd} +
+ {/each} +
+
+ {/if} + + {#if build.healthcheck} +
+ Healthcheck + {build.healthcheck} +
+ {/if} +
+
+
+{/snippet} + + +{#if showCreate} +
+
{ if (!creating) showCreate = false; }} + onkeydown={(e) => { if (e.key === 'Escape' && !creating) showCreate = false; }} + >
+
+

+ Create Template +

+

+ Build a new global template by running commands on a base image. +

+ + {#if createError} +
+ {createError} +
+ {/if} + +
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + +

+ Each command runs with a 30s timeout. Non-zero exit codes abort the build. +

+
+ +
+ + +

+ If set, the build will poll this command every 1s (up to 60s) after the recipe completes. On success, a full snapshot (with memory state) is created. Without a healthcheck, only the rootfs is saved. +

+
+
+ +
+ + +
+
+
+{/if} + + +{#if deleteTarget} +
+
{ if (!deleting) deleteTarget = null; }} + onkeydown={(e) => { if (e.key === 'Escape' && !deleting) deleteTarget = null; }} + >
+
+

+ Delete Template +

+

+ Permanently remove {deleteTarget.name} from all hosts. +

+ + {#if deleteError} +
+ {deleteError} +
+ {/if} + +
+ + +
+
+
+{/if} + + diff --git a/internal/api/handlers_builds.go b/internal/api/handlers_builds.go new file mode 100644 index 0000000..ae9a48e --- /dev/null +++ b/internal/api/handlers_builds.go @@ -0,0 +1,156 @@ +package api + +import ( + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/go-chi/chi/v5" + + "git.omukk.dev/wrenn/sandbox/internal/db" + "git.omukk.dev/wrenn/sandbox/internal/service" + "git.omukk.dev/wrenn/sandbox/internal/validate" +) + +type buildHandler struct { + svc *service.BuildService +} + +func newBuildHandler(svc *service.BuildService) *buildHandler { + return &buildHandler{svc: svc} +} + +type createBuildRequest struct { + Name string `json:"name"` + BaseTemplate string `json:"base_template"` + Recipe []string `json:"recipe"` + Healthcheck string `json:"healthcheck"` + VCPUs int32 `json:"vcpus"` + MemoryMB int32 `json:"memory_mb"` +} + +type buildResponse struct { + ID string `json:"id"` + Name string `json:"name"` + BaseTemplate string `json:"base_template"` + Recipe json.RawMessage `json:"recipe"` + Healthcheck *string `json:"healthcheck,omitempty"` + VCPUs int32 `json:"vcpus"` + MemoryMB int32 `json:"memory_mb"` + Status string `json:"status"` + CurrentStep int32 `json:"current_step"` + TotalSteps int32 `json:"total_steps"` + Logs json.RawMessage `json:"logs"` + Error *string `json:"error,omitempty"` + SandboxID *string `json:"sandbox_id,omitempty"` + HostID *string `json:"host_id,omitempty"` + CreatedAt string `json:"created_at"` + StartedAt *string `json:"started_at,omitempty"` + CompletedAt *string `json:"completed_at,omitempty"` +} + +func buildToResponse(b db.TemplateBuild) buildResponse { + resp := buildResponse{ + ID: b.ID, + Name: b.Name, + BaseTemplate: b.BaseTemplate, + Recipe: b.Recipe, + VCPUs: b.Vcpus, + MemoryMB: b.MemoryMb, + Status: b.Status, + CurrentStep: b.CurrentStep, + TotalSteps: b.TotalSteps, + Logs: b.Logs, + } + if b.Healthcheck.Valid { + resp.Healthcheck = &b.Healthcheck.String + } + if b.Error.Valid { + resp.Error = &b.Error.String + } + if b.SandboxID.Valid { + resp.SandboxID = &b.SandboxID.String + } + if b.HostID.Valid { + resp.HostID = &b.HostID.String + } + if b.CreatedAt.Valid { + resp.CreatedAt = b.CreatedAt.Time.Format(time.RFC3339) + } + if b.StartedAt.Valid { + s := b.StartedAt.Time.Format(time.RFC3339) + resp.StartedAt = &s + } + if b.CompletedAt.Valid { + s := b.CompletedAt.Time.Format(time.RFC3339) + resp.CompletedAt = &s + } + return resp +} + +// Create handles POST /v1/admin/builds. +func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) { + var req createBuildRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "invalid JSON body") + return + } + + if req.Name == "" { + writeError(w, http.StatusBadRequest, "invalid_request", "name is required") + return + } + if err := validate.SafeName(req.Name); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", fmt.Sprintf("invalid template name: %s", err)) + return + } + if len(req.Recipe) == 0 { + writeError(w, http.StatusBadRequest, "invalid_request", "recipe must contain at least one command") + return + } + + build, err := h.svc.Create(r.Context(), service.BuildCreateParams{ + Name: req.Name, + BaseTemplate: req.BaseTemplate, + Recipe: req.Recipe, + Healthcheck: req.Healthcheck, + VCPUs: req.VCPUs, + MemoryMB: req.MemoryMB, + }) + if err != nil { + writeError(w, http.StatusInternalServerError, "build_error", err.Error()) + return + } + + writeJSON(w, http.StatusCreated, buildToResponse(build)) +} + +// List handles GET /v1/admin/builds. +func (h *buildHandler) List(w http.ResponseWriter, r *http.Request) { + builds, err := h.svc.List(r.Context()) + if err != nil { + writeError(w, http.StatusInternalServerError, "db_error", "failed to list builds") + return + } + + resp := make([]buildResponse, len(builds)) + for i, b := range builds { + resp[i] = buildToResponse(b) + } + + writeJSON(w, http.StatusOK, resp) +} + +// Get handles GET /v1/admin/builds/{id}. +func (h *buildHandler) Get(w http.ResponseWriter, r *http.Request) { + buildID := chi.URLParam(r, "id") + + build, err := h.svc.Get(r.Context(), buildID) + if err != nil { + writeError(w, http.StatusNotFound, "not_found", "build not found") + return + } + + writeJSON(w, http.StatusOK, buildToResponse(build)) +} diff --git a/internal/api/server.go b/internal/api/server.go index 918476b..6999b8a 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -22,7 +22,8 @@ var openapiYAML []byte // Server is the control plane HTTP server. type Server struct { - router chi.Router + router chi.Router + BuildSvc *service.BuildService } // New constructs the chi router and registers all routes. @@ -47,6 +48,7 @@ func New( teamSvc := &service.TeamService{DB: queries, Pool: pgPool, HostPool: pool} auditSvc := &service.AuditService{DB: queries} statsSvc := &service.StatsService{DB: queries, Pool: pgPool} + buildSvc := &service.BuildService{DB: queries, Redis: rdb, Pool: pool, Scheduler: sched} al := audit.New(queries) @@ -65,6 +67,7 @@ func New( auditH := newAuditHandler(auditSvc) statsH := newStatsHandler(statsSvc) metricsH := newSandboxMetricsHandler(queries, pool) + buildH := newBuildHandler(buildSvc) // OpenAPI spec and docs. r.Get("/openapi.yaml", serveOpenAPI) @@ -174,9 +177,12 @@ func New( r.Use(requireJWT(jwtSecret)) r.Use(requireAdmin(queries)) r.Put("/teams/{id}/byoc", teamH.SetBYOC) + r.Post("/builds", buildH.Create) + r.Get("/builds", buildH.List) + r.Get("/builds/{id}", buildH.Get) }) - return &Server{router: r} + return &Server{router: r, BuildSvc: buildSvc} } // Handler returns the HTTP handler. diff --git a/internal/db/models.go b/internal/db/models.go index 0128f4a..74596c6 100644 --- a/internal/db/models.go +++ b/internal/db/models.go @@ -147,6 +147,26 @@ type Template struct { TeamID string `json:"team_id"` } +type TemplateBuild struct { + ID string `json:"id"` + Name string `json:"name"` + BaseTemplate string `json:"base_template"` + Recipe []byte `json:"recipe"` + Healthcheck pgtype.Text `json:"healthcheck"` + Vcpus int32 `json:"vcpus"` + MemoryMb int32 `json:"memory_mb"` + Status string `json:"status"` + CurrentStep int32 `json:"current_step"` + TotalSteps int32 `json:"total_steps"` + Logs []byte `json:"logs"` + Error pgtype.Text `json:"error"` + SandboxID pgtype.Text `json:"sandbox_id"` + HostID pgtype.Text `json:"host_id"` + CreatedAt pgtype.Timestamptz `json:"created_at"` + StartedAt pgtype.Timestamptz `json:"started_at"` + CompletedAt pgtype.Timestamptz `json:"completed_at"` +} + type User struct { ID string `json:"id"` Email string `json:"email"` diff --git a/internal/db/template_builds.sql.go b/internal/db/template_builds.sql.go new file mode 100644 index 0000000..8142d29 --- /dev/null +++ b/internal/db/template_builds.sql.go @@ -0,0 +1,223 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 +// source: template_builds.sql + +package db + +import ( + "context" + + "github.com/jackc/pgx/v5/pgtype" +) + +const getTemplateBuild = `-- name: GetTemplateBuild :one +SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at FROM template_builds WHERE id = $1 +` + +func (q *Queries) GetTemplateBuild(ctx context.Context, id string) (TemplateBuild, error) { + row := q.db.QueryRow(ctx, getTemplateBuild, id) + var i TemplateBuild + err := row.Scan( + &i.ID, + &i.Name, + &i.BaseTemplate, + &i.Recipe, + &i.Healthcheck, + &i.Vcpus, + &i.MemoryMb, + &i.Status, + &i.CurrentStep, + &i.TotalSteps, + &i.Logs, + &i.Error, + &i.SandboxID, + &i.HostID, + &i.CreatedAt, + &i.StartedAt, + &i.CompletedAt, + ) + return i, err +} + +const insertTemplateBuild = `-- name: InsertTemplateBuild :one +INSERT INTO template_builds (id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, total_steps) +VALUES ($1, $2, $3, $4, $5, $6, $7, 'pending', $8) +RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at +` + +type InsertTemplateBuildParams struct { + ID string `json:"id"` + Name string `json:"name"` + BaseTemplate string `json:"base_template"` + Recipe []byte `json:"recipe"` + Healthcheck pgtype.Text `json:"healthcheck"` + Vcpus int32 `json:"vcpus"` + MemoryMb int32 `json:"memory_mb"` + TotalSteps int32 `json:"total_steps"` +} + +func (q *Queries) InsertTemplateBuild(ctx context.Context, arg InsertTemplateBuildParams) (TemplateBuild, error) { + row := q.db.QueryRow(ctx, insertTemplateBuild, + arg.ID, + arg.Name, + arg.BaseTemplate, + arg.Recipe, + arg.Healthcheck, + arg.Vcpus, + arg.MemoryMb, + arg.TotalSteps, + ) + var i TemplateBuild + err := row.Scan( + &i.ID, + &i.Name, + &i.BaseTemplate, + &i.Recipe, + &i.Healthcheck, + &i.Vcpus, + &i.MemoryMb, + &i.Status, + &i.CurrentStep, + &i.TotalSteps, + &i.Logs, + &i.Error, + &i.SandboxID, + &i.HostID, + &i.CreatedAt, + &i.StartedAt, + &i.CompletedAt, + ) + return i, err +} + +const listTemplateBuilds = `-- name: ListTemplateBuilds :many +SELECT id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at FROM template_builds ORDER BY created_at DESC +` + +func (q *Queries) ListTemplateBuilds(ctx context.Context) ([]TemplateBuild, error) { + rows, err := q.db.Query(ctx, listTemplateBuilds) + if err != nil { + return nil, err + } + defer rows.Close() + var items []TemplateBuild + for rows.Next() { + var i TemplateBuild + if err := rows.Scan( + &i.ID, + &i.Name, + &i.BaseTemplate, + &i.Recipe, + &i.Healthcheck, + &i.Vcpus, + &i.MemoryMb, + &i.Status, + &i.CurrentStep, + &i.TotalSteps, + &i.Logs, + &i.Error, + &i.SandboxID, + &i.HostID, + &i.CreatedAt, + &i.StartedAt, + &i.CompletedAt, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const updateBuildError = `-- name: UpdateBuildError :exec +UPDATE template_builds +SET error = $2, status = 'failed', completed_at = NOW() +WHERE id = $1 +` + +type UpdateBuildErrorParams struct { + ID string `json:"id"` + Error pgtype.Text `json:"error"` +} + +func (q *Queries) UpdateBuildError(ctx context.Context, arg UpdateBuildErrorParams) error { + _, err := q.db.Exec(ctx, updateBuildError, arg.ID, arg.Error) + return err +} + +const updateBuildProgress = `-- name: UpdateBuildProgress :exec +UPDATE template_builds +SET current_step = $2, logs = $3 +WHERE id = $1 +` + +type UpdateBuildProgressParams struct { + ID string `json:"id"` + CurrentStep int32 `json:"current_step"` + Logs []byte `json:"logs"` +} + +func (q *Queries) UpdateBuildProgress(ctx context.Context, arg UpdateBuildProgressParams) error { + _, err := q.db.Exec(ctx, updateBuildProgress, arg.ID, arg.CurrentStep, arg.Logs) + return err +} + +const updateBuildSandbox = `-- name: UpdateBuildSandbox :exec +UPDATE template_builds +SET sandbox_id = $2, host_id = $3 +WHERE id = $1 +` + +type UpdateBuildSandboxParams struct { + ID string `json:"id"` + SandboxID pgtype.Text `json:"sandbox_id"` + HostID pgtype.Text `json:"host_id"` +} + +func (q *Queries) UpdateBuildSandbox(ctx context.Context, arg UpdateBuildSandboxParams) error { + _, err := q.db.Exec(ctx, updateBuildSandbox, arg.ID, arg.SandboxID, arg.HostID) + return err +} + +const updateBuildStatus = `-- name: UpdateBuildStatus :one +UPDATE template_builds +SET status = $2, + started_at = CASE WHEN $2 = 'running' AND started_at IS NULL THEN NOW() ELSE started_at END, + completed_at = CASE WHEN $2 IN ('success', 'failed') THEN NOW() ELSE completed_at END +WHERE id = $1 +RETURNING id, name, base_template, recipe, healthcheck, vcpus, memory_mb, status, current_step, total_steps, logs, error, sandbox_id, host_id, created_at, started_at, completed_at +` + +type UpdateBuildStatusParams struct { + ID string `json:"id"` + Status string `json:"status"` +} + +func (q *Queries) UpdateBuildStatus(ctx context.Context, arg UpdateBuildStatusParams) (TemplateBuild, error) { + row := q.db.QueryRow(ctx, updateBuildStatus, arg.ID, arg.Status) + var i TemplateBuild + err := row.Scan( + &i.ID, + &i.Name, + &i.BaseTemplate, + &i.Recipe, + &i.Healthcheck, + &i.Vcpus, + &i.MemoryMb, + &i.Status, + &i.CurrentStep, + &i.TotalSteps, + &i.Logs, + &i.Error, + &i.SandboxID, + &i.HostID, + &i.CreatedAt, + &i.StartedAt, + &i.CompletedAt, + ) + return i, err +} diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go index fb7fb66..86fdda0 100644 --- a/internal/hostagent/server.go +++ b/internal/hostagent/server.go @@ -110,6 +110,19 @@ func (s *Server) DeleteSnapshot( return connect.NewResponse(&pb.DeleteSnapshotResponse{}), nil } +func (s *Server) FlattenRootfs( + ctx context.Context, + req *connect.Request[pb.FlattenRootfsRequest], +) (*connect.Response[pb.FlattenRootfsResponse], error) { + sizeBytes, err := s.mgr.FlattenRootfs(ctx, req.Msg.SandboxId, req.Msg.Name) + if err != nil { + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("flatten rootfs: %w", err)) + } + return connect.NewResponse(&pb.FlattenRootfsResponse{ + SizeBytes: sizeBytes, + }), nil +} + func (s *Server) PingSandbox( ctx context.Context, req *connect.Request[pb.PingSandboxRequest], diff --git a/internal/id/id.go b/internal/id/id.go index bbda47c..836af6d 100644 --- a/internal/id/id.go +++ b/internal/id/id.go @@ -78,6 +78,11 @@ func NewAuditLogID() string { return "log-" + hex8() } +// NewBuildID generates a new template build ID in the format "bld-" + 8 hex chars. +func NewBuildID() string { + return "bld-" + hex8() +} + // NewRefreshToken generates a 64-char hex token (32 bytes of entropy) for use as a host refresh token. func NewRefreshToken() string { b := make([]byte, 32) diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 9464263..1d103bc 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -795,6 +795,88 @@ func (m *Manager) CreateSnapshot(ctx context.Context, sandboxID, name string) (i return sizeBytes, nil } +// FlattenRootfs stops a running sandbox, flattens its device-mapper CoW +// rootfs into a standalone rootfs.ext4, and cleans up all resources. +// The result is an image-only template (no VM memory/CPU state) stored in +// ImagesDir/{name}/rootfs.ext4. +func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID, name string) (int64, error) { + if err := validate.SafeName(name); err != nil { + return 0, fmt.Errorf("invalid template name: %w", err) + } + + m.mu.Lock() + sb, ok := m.boxes[sandboxID] + if ok { + delete(m.boxes, sandboxID) + } + m.mu.Unlock() + + if !ok { + return 0, fmt.Errorf("sandbox %s not found", sandboxID) + } + + // Stop the VM but keep the dm device alive for flattening. + m.stopSampler(sb) + if err := m.vm.Destroy(ctx, sb.ID); err != nil { + slog.Warn("vm destroy error during flatten", "id", sb.ID, "error", err) + } + + // Release network resources — not needed after VM is stopped. + if err := network.RemoveNetwork(sb.slot); err != nil { + slog.Warn("network cleanup error during flatten", "id", sb.ID, "error", err) + } + m.slots.Release(sb.SlotIndex) + + if sb.uffdSocketPath != "" { + os.Remove(sb.uffdSocketPath) + } + + // Create template directory and flatten the dm-snapshot. + if err := snapshot.EnsureDir(m.cfg.ImagesDir, name); err != nil { + m.cleanupDM(sb) + return 0, fmt.Errorf("create template dir: %w", err) + } + + outputPath := snapshot.RootfsPath(m.cfg.ImagesDir, name) + if sb.dmDevice == nil { + return 0, fmt.Errorf("sandbox %s has no dm device", sandboxID) + } + + if err := devicemapper.FlattenSnapshot(sb.dmDevice.DevicePath, outputPath); err != nil { + m.cleanupDM(sb) + warnErr("template dir cleanup error", name, snapshot.Remove(m.cfg.ImagesDir, name)) + return 0, fmt.Errorf("flatten rootfs: %w", err) + } + + // Clean up dm device and loop device now that flatten is complete. + m.cleanupDM(sb) + + sizeBytes, err := snapshot.DirSize(m.cfg.ImagesDir, name) + if err != nil { + slog.Warn("failed to calculate template size", "error", err) + } + + slog.Info("rootfs flattened to image-only template", + "sandbox", sandboxID, + "name", name, + "size_bytes", sizeBytes, + ) + return sizeBytes, nil +} + +// cleanupDM tears down the dm-snapshot device and releases the base image loop device. +func (m *Manager) cleanupDM(sb *sandboxState) { + if sb.dmDevice != nil { + if err := devicemapper.RemoveSnapshot(context.Background(), sb.dmDevice); err != nil { + slog.Warn("dm-snapshot remove error", "id", sb.ID, "error", err) + } + os.Remove(sb.dmDevice.CowPath) + } + if sb.baseImagePath != "" { + m.loops.Release(sb.baseImagePath) + } +} + // DeleteSnapshot removes a snapshot template from disk. func (m *Manager) DeleteSnapshot(name string) error { if err := validate.SafeName(name); err != nil { diff --git a/internal/service/build.go b/internal/service/build.go new file mode 100644 index 0000000..054f8a2 --- /dev/null +++ b/internal/service/build.go @@ -0,0 +1,385 @@ +package service + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "time" + + "connectrpc.com/connect" + "github.com/jackc/pgx/v5/pgtype" + "github.com/redis/go-redis/v9" + + "git.omukk.dev/wrenn/sandbox/internal/db" + "git.omukk.dev/wrenn/sandbox/internal/id" + "git.omukk.dev/wrenn/sandbox/internal/lifecycle" + "git.omukk.dev/wrenn/sandbox/internal/scheduler" + pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen" +) + +const ( + buildQueueKey = "wrenn:build_queue" + buildCommandTimeout = 30 * time.Second + healthcheckInterval = 1 * time.Second + healthcheckTimeout = 60 * time.Second + platformTeamID = "platform" +) + +// buildAgentClient is the subset of the host agent client used by the build worker. +type buildAgentClient interface { + CreateSandbox(ctx context.Context, req *connect.Request[pb.CreateSandboxRequest]) (*connect.Response[pb.CreateSandboxResponse], error) + DestroySandbox(ctx context.Context, req *connect.Request[pb.DestroySandboxRequest]) (*connect.Response[pb.DestroySandboxResponse], error) + Exec(ctx context.Context, req *connect.Request[pb.ExecRequest]) (*connect.Response[pb.ExecResponse], error) + CreateSnapshot(ctx context.Context, req *connect.Request[pb.CreateSnapshotRequest]) (*connect.Response[pb.CreateSnapshotResponse], error) + FlattenRootfs(ctx context.Context, req *connect.Request[pb.FlattenRootfsRequest]) (*connect.Response[pb.FlattenRootfsResponse], error) +} + +// BuildLogEntry represents a single entry in the build log JSONB array. +type BuildLogEntry struct { + Step int `json:"step"` + Cmd string `json:"cmd"` + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + Exit int32 `json:"exit"` + Ok bool `json:"ok"` + Elapsed int64 `json:"elapsed_ms"` +} + +// BuildService handles template build orchestration. +type BuildService struct { + DB *db.Queries + Redis *redis.Client + Pool *lifecycle.HostClientPool + Scheduler scheduler.HostScheduler +} + +// BuildCreateParams holds the parameters for creating a template build. +type BuildCreateParams struct { + Name string + BaseTemplate string + Recipe []string + Healthcheck string + VCPUs int32 + MemoryMB int32 +} + +// Create inserts a new build record and enqueues it to Redis. +func (s *BuildService) Create(ctx context.Context, p BuildCreateParams) (db.TemplateBuild, error) { + if p.BaseTemplate == "" { + p.BaseTemplate = "minimal" + } + if p.VCPUs <= 0 { + p.VCPUs = 1 + } + if p.MemoryMB <= 0 { + p.MemoryMB = 512 + } + + recipeJSON, err := json.Marshal(p.Recipe) + if err != nil { + return db.TemplateBuild{}, fmt.Errorf("marshal recipe: %w", err) + } + + buildID := id.NewBuildID() + + build, err := s.DB.InsertTemplateBuild(ctx, db.InsertTemplateBuildParams{ + ID: buildID, + Name: p.Name, + BaseTemplate: p.BaseTemplate, + Recipe: recipeJSON, + Healthcheck: pgtype.Text{String: p.Healthcheck, Valid: p.Healthcheck != ""}, + Vcpus: p.VCPUs, + MemoryMb: p.MemoryMB, + TotalSteps: int32(len(p.Recipe)), + }) + if err != nil { + return db.TemplateBuild{}, fmt.Errorf("insert build: %w", err) + } + + // Enqueue build ID to Redis for workers to pick up. + if err := s.Redis.RPush(ctx, buildQueueKey, buildID).Err(); err != nil { + return db.TemplateBuild{}, fmt.Errorf("enqueue build: %w", err) + } + + return build, nil +} + +// Get returns a single build by ID. +func (s *BuildService) Get(ctx context.Context, buildID string) (db.TemplateBuild, error) { + return s.DB.GetTemplateBuild(ctx, buildID) +} + +// List returns all builds ordered by creation time. +func (s *BuildService) List(ctx context.Context) ([]db.TemplateBuild, error) { + return s.DB.ListTemplateBuilds(ctx) +} + +// StartWorkers launches n goroutines that consume from the Redis build queue. +// The returned cancel function stops all workers. +func (s *BuildService) StartWorkers(ctx context.Context, n int) context.CancelFunc { + ctx, cancel := context.WithCancel(ctx) + for i := range n { + go s.worker(ctx, i) + } + slog.Info("build workers started", "count", n) + return cancel +} + +func (s *BuildService) worker(ctx context.Context, workerID int) { + log := slog.With("worker", workerID) + for { + // BLPOP blocks until a build ID is available or context is cancelled. + result, err := s.Redis.BLPop(ctx, 0, buildQueueKey).Result() + if err != nil { + if ctx.Err() != nil { + log.Info("build worker shutting down") + return + } + log.Error("redis BLPOP error", "error", err) + time.Sleep(time.Second) + continue + } + // result[0] is the key, result[1] is the build ID. + buildID := result[1] + log.Info("picked up build", "build_id", buildID) + s.executeBuild(ctx, buildID) + } +} + +func (s *BuildService) executeBuild(ctx context.Context, buildID string) { + log := slog.With("build_id", buildID) + + build, err := s.DB.GetTemplateBuild(ctx, buildID) + if err != nil { + log.Error("failed to fetch build", "error", err) + return + } + + // Mark as running. + if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ + ID: buildID, Status: "running", + }); err != nil { + log.Error("failed to update build status", "error", err) + return + } + + // Parse recipe. + var recipe []string + if err := json.Unmarshal(build.Recipe, &recipe); err != nil { + s.failBuild(ctx, buildID, fmt.Sprintf("invalid recipe JSON: %v", err)) + return + } + + // Pick a platform host and create a sandbox. + host, err := s.Scheduler.SelectHost(ctx, platformTeamID, false) + if err != nil { + s.failBuild(ctx, buildID, fmt.Sprintf("no host available: %v", err)) + return + } + + agent, err := s.Pool.GetForHost(host) + if err != nil { + s.failBuild(ctx, buildID, fmt.Sprintf("agent client error: %v", err)) + return + } + + sandboxID := id.NewSandboxID() + log = log.With("sandbox_id", sandboxID, "host_id", host.ID) + + resp, err := agent.CreateSandbox(ctx, connect.NewRequest(&pb.CreateSandboxRequest{ + SandboxId: sandboxID, + Template: build.BaseTemplate, + Vcpus: build.Vcpus, + MemoryMb: build.MemoryMb, + TimeoutSec: 0, // no auto-pause for builds + })) + if err != nil { + s.failBuild(ctx, buildID, fmt.Sprintf("create sandbox failed: %v", err)) + return + } + _ = resp + + // Record sandbox/host association. + _ = s.DB.UpdateBuildSandbox(ctx, db.UpdateBuildSandboxParams{ + ID: buildID, + SandboxID: pgtype.Text{String: sandboxID, Valid: true}, + HostID: pgtype.Text{String: host.ID, Valid: true}, + }) + + // Execute recipe commands. + var logs []BuildLogEntry + for i, cmd := range recipe { + log.Info("executing build step", "step", i+1, "cmd", cmd) + + execCtx, cancel := context.WithTimeout(ctx, buildCommandTimeout) + start := time.Now() + + execResp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ + SandboxId: sandboxID, + Cmd: "/bin/sh", + Args: []string{"-c", cmd}, + TimeoutSec: int32(buildCommandTimeout.Seconds()), + })) + cancel() + + entry := BuildLogEntry{ + Step: i + 1, + Cmd: cmd, + Elapsed: time.Since(start).Milliseconds(), + } + + if err != nil { + entry.Stderr = err.Error() + entry.Ok = false + logs = append(logs, entry) + s.updateLogs(ctx, buildID, i+1, logs) + s.destroySandbox(ctx, agent, sandboxID) + s.failBuild(ctx, buildID, fmt.Sprintf("step %d exec error: %v", i+1, err)) + return + } + + entry.Stdout = string(execResp.Msg.Stdout) + entry.Stderr = string(execResp.Msg.Stderr) + entry.Exit = execResp.Msg.ExitCode + entry.Ok = execResp.Msg.ExitCode == 0 + logs = append(logs, entry) + + s.updateLogs(ctx, buildID, i+1, logs) + + if execResp.Msg.ExitCode != 0 { + s.destroySandbox(ctx, agent, sandboxID) + s.failBuild(ctx, buildID, fmt.Sprintf("step %d failed with exit code %d", i+1, execResp.Msg.ExitCode)) + return + } + } + + // Healthcheck or direct snapshot. + if build.Healthcheck.Valid && build.Healthcheck.String != "" { + log.Info("running healthcheck", "cmd", build.Healthcheck.String) + if err := s.waitForHealthcheck(ctx, agent, sandboxID, build.Healthcheck.String); err != nil { + s.destroySandbox(ctx, agent, sandboxID) + s.failBuild(ctx, buildID, fmt.Sprintf("healthcheck failed: %v", err)) + return + } + + // Healthcheck passed → full snapshot (with memory/CPU state). + log.Info("healthcheck passed, creating snapshot") + if _, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{ + SandboxId: sandboxID, + Name: build.Name, + })); err != nil { + s.destroySandbox(ctx, agent, sandboxID) + s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err)) + return + } + } else { + // No healthcheck → image-only template (rootfs only). + log.Info("no healthcheck, flattening rootfs") + if _, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{ + SandboxId: sandboxID, + Name: build.Name, + })); err != nil { + s.destroySandbox(ctx, agent, sandboxID) + s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err)) + return + } + } + + // Insert into templates table as a global (platform) template. + templateType := "base" + if build.Healthcheck.Valid && build.Healthcheck.String != "" { + templateType = "snapshot" + } + + if _, err := s.DB.InsertTemplate(ctx, db.InsertTemplateParams{ + Name: build.Name, + Type: templateType, + Vcpus: pgtype.Int4{Int32: build.Vcpus, Valid: true}, + MemoryMb: pgtype.Int4{Int32: build.MemoryMb, Valid: true}, + SizeBytes: 0, // Could query the host, but the template is created. + TeamID: platformTeamID, + }); err != nil { + log.Error("failed to insert template record", "error", err) + // Build succeeded on disk, just DB record failed — don't mark as failed. + } + + // For CreateSnapshot, the sandbox is already destroyed by the snapshot process. + // For FlattenRootfs, the sandbox is already destroyed by the flatten process. + // No additional destroy needed. + + // Mark build as success. + if _, err := s.DB.UpdateBuildStatus(ctx, db.UpdateBuildStatusParams{ + ID: buildID, Status: "success", + }); err != nil { + log.Error("failed to mark build as success", "error", err) + } + + log.Info("template build completed successfully", "name", build.Name) +} + +func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxID, cmd string) error { + deadline := time.After(healthcheckTimeout) + ticker := time.NewTicker(healthcheckInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline: + return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout) + case <-ticker.C: + execCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ + SandboxId: sandboxID, + Cmd: "/bin/sh", + Args: []string{"-c", cmd}, + TimeoutSec: 10, + })) + cancel() + + if err != nil { + slog.Debug("healthcheck exec error (retrying)", "error", err) + continue + } + if resp.Msg.ExitCode == 0 { + return nil + } + slog.Debug("healthcheck failed (retrying)", "exit_code", resp.Msg.ExitCode) + } + } +} + +func (s *BuildService) updateLogs(ctx context.Context, buildID string, step int, logs []BuildLogEntry) { + logsJSON, err := json.Marshal(logs) + if err != nil { + slog.Warn("failed to marshal build logs", "error", err) + return + } + if err := s.DB.UpdateBuildProgress(ctx, db.UpdateBuildProgressParams{ + ID: buildID, + CurrentStep: int32(step), + Logs: logsJSON, + }); err != nil { + slog.Warn("failed to update build progress", "error", err) + } +} + +func (s *BuildService) failBuild(ctx context.Context, buildID, errMsg string) { + slog.Error("build failed", "build_id", buildID, "error", errMsg) + if err := s.DB.UpdateBuildError(ctx, db.UpdateBuildErrorParams{ + ID: buildID, + Error: pgtype.Text{String: errMsg, Valid: true}, + }); err != nil { + slog.Error("failed to update build error", "build_id", buildID, "error", err) + } +} + +func (s *BuildService) destroySandbox(ctx context.Context, agent buildAgentClient, sandboxID string) { + if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{ + SandboxId: sandboxID, + })); err != nil { + slog.Warn("failed to destroy build sandbox", "sandbox_id", sandboxID, "error", err) + } +} diff --git a/proto/hostagent/gen/hostagent.pb.go b/proto/hostagent/gen/hostagent.pb.go index f496b2c..c7436b7 100644 --- a/proto/hostagent/gen/hostagent.pb.go +++ b/proto/hostagent/gen/hostagent.pb.go @@ -2171,6 +2171,102 @@ func (x *FlushSandboxMetricsResponse) GetPoints_24H() []*MetricPoint { return nil } +type FlattenRootfsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` // template name — output written to images/{name}/rootfs.ext4 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlattenRootfsRequest) Reset() { + *x = FlattenRootfsRequest{} + mi := &file_hostagent_proto_msgTypes[40] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlattenRootfsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlattenRootfsRequest) ProtoMessage() {} + +func (x *FlattenRootfsRequest) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[40] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlattenRootfsRequest.ProtoReflect.Descriptor instead. +func (*FlattenRootfsRequest) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{40} +} + +func (x *FlattenRootfsRequest) GetSandboxId() string { + if x != nil { + return x.SandboxId + } + return "" +} + +func (x *FlattenRootfsRequest) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +type FlattenRootfsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + SizeBytes int64 `protobuf:"varint,1,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlattenRootfsResponse) Reset() { + *x = FlattenRootfsResponse{} + mi := &file_hostagent_proto_msgTypes[41] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlattenRootfsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlattenRootfsResponse) ProtoMessage() {} + +func (x *FlattenRootfsResponse) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[41] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlattenRootfsResponse.ProtoReflect.Descriptor instead. +func (*FlattenRootfsResponse) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{41} +} + +func (x *FlattenRootfsResponse) GetSizeBytes() int64 { + if x != nil { + return x.SizeBytes + } + return 0 +} + var File_hostagent_proto protoreflect.FileDescriptor const file_hostagent_proto_rawDesc = "" + @@ -2319,7 +2415,14 @@ const file_hostagent_proto_rawDesc = "" + "points_10m\x18\x01 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints10m\x126\n" + "\tpoints_2h\x18\x02 \x03(\v2\x19.hostagent.v1.MetricPointR\bpoints2h\x128\n" + "\n" + - "points_24h\x18\x03 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints24h2\xee\v\n" + + "points_24h\x18\x03 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints24h\"I\n" + + "\x14FlattenRootfsRequest\x12\x1d\n" + + "\n" + + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x12\n" + + "\x04name\x18\x02 \x01(\tR\x04name\"6\n" + + "\x15FlattenRootfsResponse\x12\x1d\n" + + "\n" + + "size_bytes\x18\x01 \x01(\x03R\tsizeBytes2\xc8\f\n" + "\x10HostAgentService\x12X\n" + "\rCreateSandbox\x12\".hostagent.v1.CreateSandboxRequest\x1a#.hostagent.v1.CreateSandboxResponse\x12[\n" + "\x0eDestroySandbox\x12#.hostagent.v1.DestroySandboxRequest\x1a$.hostagent.v1.DestroySandboxResponse\x12U\n" + @@ -2338,7 +2441,8 @@ const file_hostagent_proto_rawDesc = "" + "\vPingSandbox\x12 .hostagent.v1.PingSandboxRequest\x1a!.hostagent.v1.PingSandboxResponse\x12L\n" + "\tTerminate\x12\x1e.hostagent.v1.TerminateRequest\x1a\x1f.hostagent.v1.TerminateResponse\x12d\n" + "\x11GetSandboxMetrics\x12&.hostagent.v1.GetSandboxMetricsRequest\x1a'.hostagent.v1.GetSandboxMetricsResponse\x12j\n" + - "\x13FlushSandboxMetrics\x12(.hostagent.v1.FlushSandboxMetricsRequest\x1a).hostagent.v1.FlushSandboxMetricsResponseB\xb0\x01\n" + + "\x13FlushSandboxMetrics\x12(.hostagent.v1.FlushSandboxMetricsRequest\x1a).hostagent.v1.FlushSandboxMetricsResponse\x12X\n" + + "\rFlattenRootfs\x12\".hostagent.v1.FlattenRootfsRequest\x1a#.hostagent.v1.FlattenRootfsResponseB\xb0\x01\n" + "\x10com.hostagent.v1B\x0eHostagentProtoP\x01Z;git.omukk.dev/wrenn/sandbox/proto/hostagent/gen;hostagentv1\xa2\x02\x03HXX\xaa\x02\fHostagent.V1\xca\x02\fHostagent\\V1\xe2\x02\x18Hostagent\\V1\\GPBMetadata\xea\x02\rHostagent::V1b\x06proto3" var ( @@ -2353,7 +2457,7 @@ func file_hostagent_proto_rawDescGZIP() []byte { return file_hostagent_proto_rawDescData } -var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 40) +var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 42) var file_hostagent_proto_goTypes = []any{ (*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest (*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse @@ -2395,6 +2499,8 @@ var file_hostagent_proto_goTypes = []any{ (*GetSandboxMetricsResponse)(nil), // 37: hostagent.v1.GetSandboxMetricsResponse (*FlushSandboxMetricsRequest)(nil), // 38: hostagent.v1.FlushSandboxMetricsRequest (*FlushSandboxMetricsResponse)(nil), // 39: hostagent.v1.FlushSandboxMetricsResponse + (*FlattenRootfsRequest)(nil), // 40: hostagent.v1.FlattenRootfsRequest + (*FlattenRootfsResponse)(nil), // 41: hostagent.v1.FlattenRootfsResponse } var file_hostagent_proto_depIdxs = []int32{ 16, // 0: hostagent.v1.ListSandboxesResponse.sandboxes:type_name -> hostagent.v1.SandboxInfo @@ -2423,25 +2529,27 @@ var file_hostagent_proto_depIdxs = []int32{ 33, // 23: hostagent.v1.HostAgentService.Terminate:input_type -> hostagent.v1.TerminateRequest 36, // 24: hostagent.v1.HostAgentService.GetSandboxMetrics:input_type -> hostagent.v1.GetSandboxMetricsRequest 38, // 25: hostagent.v1.HostAgentService.FlushSandboxMetrics:input_type -> hostagent.v1.FlushSandboxMetricsRequest - 1, // 26: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse - 3, // 27: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse - 5, // 28: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse - 7, // 29: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse - 13, // 30: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse - 15, // 31: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse - 18, // 32: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse - 20, // 33: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse - 9, // 34: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse - 11, // 35: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse - 22, // 36: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse - 28, // 37: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse - 30, // 38: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse - 32, // 39: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse - 34, // 40: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse - 37, // 41: hostagent.v1.HostAgentService.GetSandboxMetrics:output_type -> hostagent.v1.GetSandboxMetricsResponse - 39, // 42: hostagent.v1.HostAgentService.FlushSandboxMetrics:output_type -> hostagent.v1.FlushSandboxMetricsResponse - 26, // [26:43] is the sub-list for method output_type - 9, // [9:26] is the sub-list for method input_type + 40, // 26: hostagent.v1.HostAgentService.FlattenRootfs:input_type -> hostagent.v1.FlattenRootfsRequest + 1, // 27: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse + 3, // 28: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse + 5, // 29: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse + 7, // 30: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse + 13, // 31: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse + 15, // 32: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse + 18, // 33: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse + 20, // 34: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse + 9, // 35: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse + 11, // 36: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse + 22, // 37: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse + 28, // 38: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse + 30, // 39: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse + 32, // 40: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse + 34, // 41: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse + 37, // 42: hostagent.v1.HostAgentService.GetSandboxMetrics:output_type -> hostagent.v1.GetSandboxMetricsResponse + 39, // 43: hostagent.v1.HostAgentService.FlushSandboxMetrics:output_type -> hostagent.v1.FlushSandboxMetricsResponse + 41, // 44: hostagent.v1.HostAgentService.FlattenRootfs:output_type -> hostagent.v1.FlattenRootfsResponse + 27, // [27:45] is the sub-list for method output_type + 9, // [9:27] is the sub-list for method input_type 9, // [9:9] is the sub-list for extension type_name 9, // [9:9] is the sub-list for extension extendee 0, // [0:9] is the sub-list for field type_name @@ -2471,7 +2579,7 @@ func file_hostagent_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_hostagent_proto_rawDesc), len(file_hostagent_proto_rawDesc)), NumEnums: 0, - NumMessages: 40, + NumMessages: 42, NumExtensions: 0, NumServices: 1, }, diff --git a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go index 7f0fa70..02f4ecc 100644 --- a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go +++ b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go @@ -83,6 +83,9 @@ const ( // HostAgentServiceFlushSandboxMetricsProcedure is the fully-qualified name of the // HostAgentService's FlushSandboxMetrics RPC. HostAgentServiceFlushSandboxMetricsProcedure = "/hostagent.v1.HostAgentService/FlushSandboxMetrics" + // HostAgentServiceFlattenRootfsProcedure is the fully-qualified name of the HostAgentService's + // FlattenRootfs RPC. + HostAgentServiceFlattenRootfsProcedure = "/hostagent.v1.HostAgentService/FlattenRootfs" ) // HostAgentServiceClient is a client for the hostagent.v1.HostAgentService service. @@ -126,6 +129,11 @@ type HostAgentServiceClient interface { // FlushSandboxMetrics returns all ring buffer tiers and clears them. // Called by the control plane before pause/destroy to persist metrics to DB. FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) + // FlattenRootfs stops the sandbox VM, flattens the device-mapper CoW + // snapshot into a standalone rootfs.ext4 in the images directory, then + // cleans up all sandbox resources. Used by the template build system to + // produce image-only templates (no memory/CPU state). + FlattenRootfs(context.Context, *connect.Request[gen.FlattenRootfsRequest]) (*connect.Response[gen.FlattenRootfsResponse], error) } // NewHostAgentServiceClient constructs a client for the hostagent.v1.HostAgentService service. By @@ -241,6 +249,12 @@ func NewHostAgentServiceClient(httpClient connect.HTTPClient, baseURL string, op connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")), connect.WithClientOptions(opts...), ), + flattenRootfs: connect.NewClient[gen.FlattenRootfsRequest, gen.FlattenRootfsResponse]( + httpClient, + baseURL+HostAgentServiceFlattenRootfsProcedure, + connect.WithSchema(hostAgentServiceMethods.ByName("FlattenRootfs")), + connect.WithClientOptions(opts...), + ), } } @@ -263,6 +277,7 @@ type hostAgentServiceClient struct { terminate *connect.Client[gen.TerminateRequest, gen.TerminateResponse] getSandboxMetrics *connect.Client[gen.GetSandboxMetricsRequest, gen.GetSandboxMetricsResponse] flushSandboxMetrics *connect.Client[gen.FlushSandboxMetricsRequest, gen.FlushSandboxMetricsResponse] + flattenRootfs *connect.Client[gen.FlattenRootfsRequest, gen.FlattenRootfsResponse] } // CreateSandbox calls hostagent.v1.HostAgentService.CreateSandbox. @@ -350,6 +365,11 @@ func (c *hostAgentServiceClient) FlushSandboxMetrics(ctx context.Context, req *c return c.flushSandboxMetrics.CallUnary(ctx, req) } +// FlattenRootfs calls hostagent.v1.HostAgentService.FlattenRootfs. +func (c *hostAgentServiceClient) FlattenRootfs(ctx context.Context, req *connect.Request[gen.FlattenRootfsRequest]) (*connect.Response[gen.FlattenRootfsResponse], error) { + return c.flattenRootfs.CallUnary(ctx, req) +} + // HostAgentServiceHandler is an implementation of the hostagent.v1.HostAgentService service. type HostAgentServiceHandler interface { // CreateSandbox boots a new microVM with the given configuration. @@ -391,6 +411,11 @@ type HostAgentServiceHandler interface { // FlushSandboxMetrics returns all ring buffer tiers and clears them. // Called by the control plane before pause/destroy to persist metrics to DB. FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) + // FlattenRootfs stops the sandbox VM, flattens the device-mapper CoW + // snapshot into a standalone rootfs.ext4 in the images directory, then + // cleans up all sandbox resources. Used by the template build system to + // produce image-only templates (no memory/CPU state). + FlattenRootfs(context.Context, *connect.Request[gen.FlattenRootfsRequest]) (*connect.Response[gen.FlattenRootfsResponse], error) } // NewHostAgentServiceHandler builds an HTTP handler from the service implementation. It returns the @@ -502,6 +527,12 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")), connect.WithHandlerOptions(opts...), ) + hostAgentServiceFlattenRootfsHandler := connect.NewUnaryHandler( + HostAgentServiceFlattenRootfsProcedure, + svc.FlattenRootfs, + connect.WithSchema(hostAgentServiceMethods.ByName("FlattenRootfs")), + connect.WithHandlerOptions(opts...), + ) return "/hostagent.v1.HostAgentService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case HostAgentServiceCreateSandboxProcedure: @@ -538,6 +569,8 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han hostAgentServiceGetSandboxMetricsHandler.ServeHTTP(w, r) case HostAgentServiceFlushSandboxMetricsProcedure: hostAgentServiceFlushSandboxMetricsHandler.ServeHTTP(w, r) + case HostAgentServiceFlattenRootfsProcedure: + hostAgentServiceFlattenRootfsHandler.ServeHTTP(w, r) default: http.NotFound(w, r) } @@ -614,3 +647,7 @@ func (UnimplementedHostAgentServiceHandler) GetSandboxMetrics(context.Context, * func (UnimplementedHostAgentServiceHandler) FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) { return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.FlushSandboxMetrics is not implemented")) } + +func (UnimplementedHostAgentServiceHandler) FlattenRootfs(context.Context, *connect.Request[gen.FlattenRootfsRequest]) (*connect.Response[gen.FlattenRootfsResponse], error) { + return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.FlattenRootfs is not implemented")) +} diff --git a/proto/hostagent/hostagent.proto b/proto/hostagent/hostagent.proto index 214a84e..cd93a2d 100644 --- a/proto/hostagent/hostagent.proto +++ b/proto/hostagent/hostagent.proto @@ -61,6 +61,12 @@ service HostAgentService { // Called by the control plane before pause/destroy to persist metrics to DB. rpc FlushSandboxMetrics(FlushSandboxMetricsRequest) returns (FlushSandboxMetricsResponse); + // FlattenRootfs stops the sandbox VM, flattens the device-mapper CoW + // snapshot into a standalone rootfs.ext4 in the images directory, then + // cleans up all sandbox resources. Used by the template build system to + // produce image-only templates (no memory/CPU state). + rpc FlattenRootfs(FlattenRootfsRequest) returns (FlattenRootfsResponse); + } message CreateSandboxRequest { @@ -284,3 +290,14 @@ message FlushSandboxMetricsResponse { repeated MetricPoint points_2h = 2; repeated MetricPoint points_24h = 3; } + +// ── FlattenRootfs ──────────────────────────────────────────────────── + +message FlattenRootfsRequest { + string sandbox_id = 1; + string name = 2; // template name — output written to images/{name}/rootfs.ext4 +} + +message FlattenRootfsResponse { + int64 size_bytes = 1; +}