From c0d6381bbe6e53740923cde3d7c52add0f151d48 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Thu, 26 Mar 2026 23:45:41 +0600 Subject: [PATCH] Add disk_size_mb, auto-expand base images, admin templates endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disk sizing: - Add disk_size_mb column to sandboxes table (default 20480 = 20GB) - Add disk_size_mb to CreateSandboxRequest proto, passed through the full chain: service → RPC → host agent → sandbox manager → devicemapper - devicemapper.CreateSnapshot takes separate cowSizeBytes param so the sparse CoW file can be sized independently from the origin - EnsureImageSizes() runs at host agent startup: expands any base image smaller than 20GB via truncate + resize2fs (sparse, no extra physical disk). Sandboxes then get the full 20GB via fast dm-snapshot path - FlattenRootfs shrinks output images with resize2fs -M so stored templates are compact; EnsureImageSizes re-expands on next startup Admin templates visibility: - Add GET /v1/admin/templates endpoint listing all templates across teams - Frontend admin templates page uses listAdminTemplates() instead of team-scoped listSnapshots() - Platform templates (team_id = all-zeros UUID) now visible to all teams: GetTemplateByTeam, ListTemplatesByTeam, ListTemplatesByTeamAndType queries include platform team_id in WHERE clause --- cmd/host-agent/main.go | 8 ++ db/migrations/20260310094104_initial.sql | 1 + db/queries/sandboxes.sql | 4 +- db/queries/templates.sql | 9 ++- frontend/src/lib/api/builds.ts | 14 ++++ .../src/routes/admin/templates/+page.svelte | 12 +-- internal/api/handlers_builds.go | 41 +++++++++- internal/api/server.go | 3 +- internal/db/models.go | 1 + internal/db/sandboxes.sql.go | 33 ++++++--- internal/db/templates.sql.go | 9 ++- internal/devicemapper/devicemapper.go | 10 ++- internal/hostagent/server.go | 2 +- internal/sandbox/images.go | 74 +++++++++++++++++++ internal/sandbox/manager.go | 31 ++++++-- internal/service/build.go | 3 +- internal/service/sandbox.go | 6 ++ proto/hostagent/gen/hostagent.pb.go | 18 ++++- proto/hostagent/hostagent.proto | 4 + 19 files changed, 241 insertions(+), 42 deletions(-) create mode 100644 internal/sandbox/images.go diff --git a/cmd/host-agent/main.go b/cmd/host-agent/main.go index 130faaf..76dc239 100644 --- a/cmd/host-agent/main.go +++ b/cmd/host-agent/main.go @@ -59,6 +59,14 @@ func main() { os.Exit(1) } + // Expand base images to the standard disk size (sparse, no extra physical + // disk). This ensures dm-snapshot sandboxes see the full size from boot. + imagesDir := filepath.Join(rootDir, "images") + if err := sandbox.EnsureImageSizes(imagesDir, sandbox.DefaultDiskSizeMB); err != nil { + slog.Error("failed to expand base images", "error", err) + os.Exit(1) + } + cfg := sandbox.Config{ KernelPath: filepath.Join(rootDir, "kernels", "vmlinux"), ImagesDir: filepath.Join(rootDir, "images"), diff --git a/db/migrations/20260310094104_initial.sql b/db/migrations/20260310094104_initial.sql index be5d29f..da6607f 100644 --- a/db/migrations/20260310094104_initial.sql +++ b/db/migrations/20260310094104_initial.sql @@ -144,6 +144,7 @@ CREATE TABLE sandboxes ( vcpus INTEGER NOT NULL DEFAULT 1, memory_mb INTEGER NOT NULL DEFAULT 512, timeout_sec INTEGER NOT NULL DEFAULT 300, + disk_size_mb INTEGER NOT NULL DEFAULT 20480, guest_ip TEXT NOT NULL DEFAULT '', host_ip TEXT NOT NULL DEFAULT '', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), diff --git a/db/queries/sandboxes.sql b/db/queries/sandboxes.sql index 71e61dc..b8ae8de 100644 --- a/db/queries/sandboxes.sql +++ b/db/queries/sandboxes.sql @@ -1,6 +1,6 @@ -- name: InsertSandbox :one -INSERT INTO sandboxes (id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec) -VALUES ($1, $2, $3, $4, $5, $6, $7, $8) +INSERT INTO sandboxes (id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING *; -- name: GetSandbox :one diff --git a/db/queries/templates.sql b/db/queries/templates.sql index b17abc3..c7b7085 100644 --- a/db/queries/templates.sql +++ b/db/queries/templates.sql @@ -7,7 +7,8 @@ RETURNING *; SELECT * FROM templates WHERE name = $1; -- name: GetTemplateByTeam :one -SELECT * FROM templates WHERE name = $1 AND team_id = $2; +-- Platform templates (team_id = 00000000-...) are visible to all teams. +SELECT * FROM templates WHERE name = $1 AND (team_id = $2 OR team_id = '00000000-0000-0000-0000-000000000000'); -- name: ListTemplates :many SELECT * FROM templates ORDER BY created_at DESC; @@ -16,10 +17,12 @@ SELECT * FROM templates ORDER BY created_at DESC; SELECT * FROM templates WHERE type = $1 ORDER BY created_at DESC; -- name: ListTemplatesByTeam :many -SELECT * FROM templates WHERE team_id = $1 ORDER BY created_at DESC; +-- Platform templates are visible to all teams. +SELECT * FROM templates WHERE (team_id = $1 OR team_id = '00000000-0000-0000-0000-000000000000') ORDER BY created_at DESC; -- name: ListTemplatesByTeamAndType :many -SELECT * FROM templates WHERE team_id = $1 AND type = $2 ORDER BY created_at DESC; +-- Platform templates are visible to all teams. +SELECT * FROM templates WHERE (team_id = $1 OR team_id = '00000000-0000-0000-0000-000000000000') AND type = $2 ORDER BY created_at DESC; -- name: DeleteTemplate :exec DELETE FROM templates WHERE name = $1; diff --git a/frontend/src/lib/api/builds.ts b/frontend/src/lib/api/builds.ts index d826b36..bfa69fa 100644 --- a/frontend/src/lib/api/builds.ts +++ b/frontend/src/lib/api/builds.ts @@ -50,3 +50,17 @@ export async function listBuilds(): Promise> { export async function getBuild(id: string): Promise> { return apiFetch('GET', `/api/v1/admin/builds/${id}`); } + +export type AdminTemplate = { + name: string; + type: string; + vcpus: number; + memory_mb: number; + size_bytes: number; + team_id: string; + created_at: string; +}; + +export async function listAdminTemplates(): Promise> { + return apiFetch('GET', '/api/v1/admin/templates'); +} diff --git a/frontend/src/routes/admin/templates/+page.svelte b/frontend/src/routes/admin/templates/+page.svelte index 904bbad..c320ea8 100644 --- a/frontend/src/routes/admin/templates/+page.svelte +++ b/frontend/src/routes/admin/templates/+page.svelte @@ -3,12 +3,14 @@ import { onMount, onDestroy } from 'svelte'; import { toast } from '$lib/toast.svelte'; import { formatDate, timeAgo } from '$lib/utils/format'; - import { listSnapshots, deleteSnapshot, type Snapshot } from '$lib/api/capsules'; + import { deleteSnapshot } from '$lib/api/capsules'; import { listBuilds, createBuild, + listAdminTemplates, type Build, - type BuildLogEntry + type BuildLogEntry, + type AdminTemplate } from '$lib/api/builds'; let collapsed = $state( @@ -20,7 +22,7 @@ let activeTab = $state<'templates' | 'builds'>('templates'); // Templates state - let templates = $state([]); + let templates = $state([]); let templatesLoading = $state(true); let templatesError = $state(null); @@ -38,7 +40,7 @@ let expandedSteps = $state>(new Set()); // Delete template state - let deleteTarget = $state(null); + let deleteTarget = $state(null); let deleting = $state(false); let deleteError = $state(null); @@ -64,7 +66,7 @@ async function fetchTemplates() { templatesLoading = true; templatesError = null; - const result = await listSnapshots(); + const result = await listAdminTemplates(); if (result.ok) { templates = result.data; } else { diff --git a/internal/api/handlers_builds.go b/internal/api/handlers_builds.go index f1b3973..61eebe7 100644 --- a/internal/api/handlers_builds.go +++ b/internal/api/handlers_builds.go @@ -17,10 +17,11 @@ import ( type buildHandler struct { svc *service.BuildService + db *db.Queries } -func newBuildHandler(svc *service.BuildService) *buildHandler { - return &buildHandler{svc: svc} +func newBuildHandler(svc *service.BuildService, db *db.Queries) *buildHandler { + return &buildHandler{svc: svc, db: db} } type createBuildRequest struct { @@ -165,3 +166,39 @@ func (h *buildHandler) Get(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, buildToResponse(build)) } + +// ListTemplates handles GET /v1/admin/templates — returns all templates across all teams. +func (h *buildHandler) ListTemplates(w http.ResponseWriter, r *http.Request) { + templates, err := h.db.ListTemplates(r.Context()) + if err != nil { + writeError(w, http.StatusInternalServerError, "db_error", "failed to list templates") + return + } + + type templateResponse struct { + Name string `json:"name"` + Type string `json:"type"` + VCPUs int32 `json:"vcpus"` + MemoryMB int32 `json:"memory_mb"` + SizeBytes int64 `json:"size_bytes"` + TeamID string `json:"team_id"` + CreatedAt string `json:"created_at"` + } + + resp := make([]templateResponse, len(templates)) + for i, t := range templates { + resp[i] = templateResponse{ + Name: t.Name, + Type: t.Type, + VCPUs: t.Vcpus, + MemoryMB: t.MemoryMb, + SizeBytes: t.SizeBytes, + TeamID: id.FormatTeamID(t.TeamID), + } + if t.CreatedAt.Valid { + resp[i].CreatedAt = t.CreatedAt.Time.Format(time.RFC3339) + } + } + + writeJSON(w, http.StatusOK, resp) +} diff --git a/internal/api/server.go b/internal/api/server.go index 6999b8a..1be4473 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -67,7 +67,7 @@ func New( auditH := newAuditHandler(auditSvc) statsH := newStatsHandler(statsSvc) metricsH := newSandboxMetricsHandler(queries, pool) - buildH := newBuildHandler(buildSvc) + buildH := newBuildHandler(buildSvc, queries) // OpenAPI spec and docs. r.Get("/openapi.yaml", serveOpenAPI) @@ -177,6 +177,7 @@ func New( r.Use(requireJWT(jwtSecret)) r.Use(requireAdmin(queries)) r.Put("/teams/{id}/byoc", teamH.SetBYOC) + r.Get("/templates", buildH.ListTemplates) r.Post("/builds", buildH.Create) r.Get("/builds", buildH.List) r.Get("/builds/{id}", buildH.Get) diff --git a/internal/db/models.go b/internal/db/models.go index 3aa765c..f35bfe7 100644 --- a/internal/db/models.go +++ b/internal/db/models.go @@ -91,6 +91,7 @@ type Sandbox struct { Vcpus int32 `json:"vcpus"` MemoryMb int32 `json:"memory_mb"` TimeoutSec int32 `json:"timeout_sec"` + DiskSizeMb int32 `json:"disk_size_mb"` GuestIp string `json:"guest_ip"` HostIp string `json:"host_ip"` CreatedAt pgtype.Timestamptz `json:"created_at"` diff --git a/internal/db/sandboxes.sql.go b/internal/db/sandboxes.sql.go index 07effdf..ace4370 100644 --- a/internal/db/sandboxes.sql.go +++ b/internal/db/sandboxes.sql.go @@ -43,7 +43,7 @@ func (q *Queries) BulkUpdateStatusByIDs(ctx context.Context, arg BulkUpdateStatu } const getSandbox = `-- name: GetSandbox :one -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE id = $1 +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE id = $1 ` func (q *Queries) GetSandbox(ctx context.Context, id pgtype.UUID) (Sandbox, error) { @@ -58,6 +58,7 @@ func (q *Queries) GetSandbox(ctx context.Context, id pgtype.UUID) (Sandbox, erro &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -69,7 +70,7 @@ func (q *Queries) GetSandbox(ctx context.Context, id pgtype.UUID) (Sandbox, erro } const getSandboxByTeam = `-- name: GetSandboxByTeam :one -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE id = $1 AND team_id = $2 +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE id = $1 AND team_id = $2 ` type GetSandboxByTeamParams struct { @@ -89,6 +90,7 @@ func (q *Queries) GetSandboxByTeam(ctx context.Context, arg GetSandboxByTeamPara &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -100,9 +102,9 @@ func (q *Queries) GetSandboxByTeam(ctx context.Context, arg GetSandboxByTeamPara } const insertSandbox = `-- name: InsertSandbox :one -INSERT INTO sandboxes (id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec) -VALUES ($1, $2, $3, $4, $5, $6, $7, $8) -RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated +INSERT INTO sandboxes (id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb) +VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) +RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated ` type InsertSandboxParams struct { @@ -114,6 +116,7 @@ type InsertSandboxParams struct { Vcpus int32 `json:"vcpus"` MemoryMb int32 `json:"memory_mb"` TimeoutSec int32 `json:"timeout_sec"` + DiskSizeMb int32 `json:"disk_size_mb"` } func (q *Queries) InsertSandbox(ctx context.Context, arg InsertSandboxParams) (Sandbox, error) { @@ -126,6 +129,7 @@ func (q *Queries) InsertSandbox(ctx context.Context, arg InsertSandboxParams) (S arg.Vcpus, arg.MemoryMb, arg.TimeoutSec, + arg.DiskSizeMb, ) var i Sandbox err := row.Scan( @@ -137,6 +141,7 @@ func (q *Queries) InsertSandbox(ctx context.Context, arg InsertSandboxParams) (S &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -148,7 +153,7 @@ func (q *Queries) InsertSandbox(ctx context.Context, arg InsertSandboxParams) (S } const listActiveSandboxesByTeam = `-- name: ListActiveSandboxesByTeam :many -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE team_id = $1 AND status IN ('running', 'paused', 'starting') ORDER BY created_at DESC ` @@ -171,6 +176,7 @@ func (q *Queries) ListActiveSandboxesByTeam(ctx context.Context, teamID pgtype.U &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -189,7 +195,7 @@ func (q *Queries) ListActiveSandboxesByTeam(ctx context.Context, teamID pgtype.U } const listSandboxes = `-- name: ListSandboxes :many -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes ORDER BY created_at DESC +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes ORDER BY created_at DESC ` func (q *Queries) ListSandboxes(ctx context.Context) ([]Sandbox, error) { @@ -210,6 +216,7 @@ func (q *Queries) ListSandboxes(ctx context.Context) ([]Sandbox, error) { &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -228,7 +235,7 @@ func (q *Queries) ListSandboxes(ctx context.Context) ([]Sandbox, error) { } const listSandboxesByHostAndStatus = `-- name: ListSandboxesByHostAndStatus :many -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE host_id = $1 AND status = ANY($2::text[]) ORDER BY created_at DESC ` @@ -256,6 +263,7 @@ func (q *Queries) ListSandboxesByHostAndStatus(ctx context.Context, arg ListSand &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -274,7 +282,7 @@ func (q *Queries) ListSandboxesByHostAndStatus(ctx context.Context, arg ListSand } const listSandboxesByTeam = `-- name: ListSandboxesByTeam :many -SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes +SELECT id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated FROM sandboxes WHERE team_id = $1 AND status NOT IN ('stopped', 'error') ORDER BY created_at DESC ` @@ -297,6 +305,7 @@ func (q *Queries) ListSandboxesByTeam(ctx context.Context, teamID pgtype.UUID) ( &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -355,7 +364,7 @@ SET status = 'running', last_active_at = $4, last_updated = NOW() WHERE id = $1 -RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated +RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated ` type UpdateSandboxRunningParams struct { @@ -382,6 +391,7 @@ func (q *Queries) UpdateSandboxRunning(ctx context.Context, arg UpdateSandboxRun &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, @@ -397,7 +407,7 @@ UPDATE sandboxes SET status = $2, last_updated = NOW() WHERE id = $1 -RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated +RETURNING id, team_id, host_id, template, status, vcpus, memory_mb, timeout_sec, disk_size_mb, guest_ip, host_ip, created_at, started_at, last_active_at, last_updated ` type UpdateSandboxStatusParams struct { @@ -417,6 +427,7 @@ func (q *Queries) UpdateSandboxStatus(ctx context.Context, arg UpdateSandboxStat &i.Vcpus, &i.MemoryMb, &i.TimeoutSec, + &i.DiskSizeMb, &i.GuestIp, &i.HostIp, &i.CreatedAt, diff --git a/internal/db/templates.sql.go b/internal/db/templates.sql.go index 8703bc9..45a673c 100644 --- a/internal/db/templates.sql.go +++ b/internal/db/templates.sql.go @@ -54,7 +54,7 @@ func (q *Queries) GetTemplate(ctx context.Context, name string) (Template, error } const getTemplateByTeam = `-- name: GetTemplateByTeam :one -SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE name = $1 AND team_id = $2 +SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE name = $1 AND (team_id = $2 OR team_id = '00000000-0000-0000-0000-000000000000') ` type GetTemplateByTeamParams struct { @@ -62,6 +62,7 @@ type GetTemplateByTeamParams struct { TeamID pgtype.UUID `json:"team_id"` } +// Platform templates (team_id = 00000000-...) are visible to all teams. func (q *Queries) GetTemplateByTeam(ctx context.Context, arg GetTemplateByTeamParams) (Template, error) { row := q.db.QueryRow(ctx, getTemplateByTeam, arg.Name, arg.TeamID) var i Template @@ -147,9 +148,10 @@ func (q *Queries) ListTemplates(ctx context.Context) ([]Template, error) { } const listTemplatesByTeam = `-- name: ListTemplatesByTeam :many -SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE team_id = $1 ORDER BY created_at DESC +SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE (team_id = $1 OR team_id = '00000000-0000-0000-0000-000000000000') ORDER BY created_at DESC ` +// Platform templates are visible to all teams. func (q *Queries) ListTemplatesByTeam(ctx context.Context, teamID pgtype.UUID) ([]Template, error) { rows, err := q.db.Query(ctx, listTemplatesByTeam, teamID) if err != nil { @@ -179,7 +181,7 @@ func (q *Queries) ListTemplatesByTeam(ctx context.Context, teamID pgtype.UUID) ( } const listTemplatesByTeamAndType = `-- name: ListTemplatesByTeamAndType :many -SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE team_id = $1 AND type = $2 ORDER BY created_at DESC +SELECT name, type, vcpus, memory_mb, size_bytes, created_at, team_id FROM templates WHERE (team_id = $1 OR team_id = '00000000-0000-0000-0000-000000000000') AND type = $2 ORDER BY created_at DESC ` type ListTemplatesByTeamAndTypeParams struct { @@ -187,6 +189,7 @@ type ListTemplatesByTeamAndTypeParams struct { Type string `json:"type"` } +// Platform templates are visible to all teams. func (q *Queries) ListTemplatesByTeamAndType(ctx context.Context, arg ListTemplatesByTeamAndTypeParams) ([]Template, error) { rows, err := q.db.Query(ctx, listTemplatesByTeamAndType, arg.TeamID, arg.Type) if err != nil { diff --git a/internal/devicemapper/devicemapper.go b/internal/devicemapper/devicemapper.go index ea14fcd..ba801f1 100644 --- a/internal/devicemapper/devicemapper.go +++ b/internal/devicemapper/devicemapper.go @@ -116,9 +116,10 @@ type SnapshotDevice struct { // writable CoW layer. // // The origin loop device must already exist (from LoopRegistry.Acquire). -func CreateSnapshot(name, originLoopDev, cowPath string, originSizeBytes int64) (*SnapshotDevice, error) { - // Create sparse CoW file sized to match the origin. - if err := createSparseFile(cowPath, originSizeBytes); err != nil { +func CreateSnapshot(name, originLoopDev, cowPath string, originSizeBytes, cowSizeBytes int64) (*SnapshotDevice, error) { + // Create sparse CoW file. The logical size limits how many blocks can be + // modified; because the file is sparse, only written blocks use real disk. + if err := createSparseFile(cowPath, cowSizeBytes); err != nil { return nil, fmt.Errorf("create cow file: %w", err) } @@ -128,6 +129,9 @@ func CreateSnapshot(name, originLoopDev, cowPath string, originSizeBytes int64) return nil, fmt.Errorf("losetup cow: %w", err) } + // The dm-snapshot virtual device size must match the origin — the snapshot + // target maps 1:1 onto origin sectors. The CoW file just needs enough + // space to store all modified blocks (it's sparse, so 20GB costs nothing). sectors := originSizeBytes / 512 if err := dmsetupCreate(name, originLoopDev, cowLoopDev, sectors); err != nil { if detachErr := losetupDetach(cowLoopDev); detachErr != nil { diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go index 86fdda0..549158f 100644 --- a/internal/hostagent/server.go +++ b/internal/hostagent/server.go @@ -39,7 +39,7 @@ func (s *Server) CreateSandbox( ) (*connect.Response[pb.CreateSandboxResponse], error) { msg := req.Msg - sb, err := s.mgr.Create(ctx, msg.SandboxId, msg.Template, int(msg.Vcpus), int(msg.MemoryMb), int(msg.TimeoutSec)) + sb, err := s.mgr.Create(ctx, msg.SandboxId, msg.Template, int(msg.Vcpus), int(msg.MemoryMb), int(msg.TimeoutSec), int(msg.DiskSizeMb)) if err != nil { return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("create sandbox: %w", err)) } diff --git a/internal/sandbox/images.go b/internal/sandbox/images.go new file mode 100644 index 0000000..0f3e24a --- /dev/null +++ b/internal/sandbox/images.go @@ -0,0 +1,74 @@ +package sandbox + +import ( + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" +) + +// DefaultDiskSizeMB is the standard disk size for base images. Images smaller +// than this are expanded at startup so that dm-snapshot sandboxes see the full +// size without per-sandbox copies. The expansion is sparse — only metadata +// changes; no physical disk is consumed beyond the original content. +const DefaultDiskSizeMB = 20480 // 20 GB + +// EnsureImageSizes walks the images directory and expands any rootfs.ext4 that +// is smaller than the target size. This is idempotent: images already at or +// above the target size are left untouched. Should be called once at host agent +// startup before any sandboxes are created. +func EnsureImageSizes(imagesDir string, targetMB int) error { + if targetMB <= 0 { + targetMB = DefaultDiskSizeMB + } + targetBytes := int64(targetMB) * 1024 * 1024 + + entries, err := os.ReadDir(imagesDir) + if err != nil { + return fmt.Errorf("read images dir: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + rootfs := filepath.Join(imagesDir, entry.Name(), "rootfs.ext4") + info, err := os.Stat(rootfs) + if err != nil { + continue // not every template dir has a rootfs.ext4 + } + + if info.Size() >= targetBytes { + continue // already large enough + } + + slog.Info("expanding base image", + "template", entry.Name(), + "from_mb", info.Size()/(1024*1024), + "to_mb", targetMB, + ) + + // Expand the file (sparse — instant, no physical disk used). + if err := os.Truncate(rootfs, targetBytes); err != nil { + return fmt.Errorf("truncate %s: %w", rootfs, err) + } + + // Check filesystem before resize. + if out, err := exec.Command("e2fsck", "-fy", rootfs).CombinedOutput(); err != nil { + // e2fsck returns 1 if it fixed errors, which is fine. + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() > 1 { + return fmt.Errorf("e2fsck %s: %s: %w", rootfs, string(out), err) + } + } + + // Grow the ext4 filesystem to fill the new file size. + if out, err := exec.Command("resize2fs", rootfs).CombinedOutput(); err != nil { + return fmt.Errorf("resize2fs %s: %s: %w", rootfs, string(out), err) + } + + slog.Info("base image expanded", "template", entry.Name(), "size_mb", targetMB) + } + + return nil +} diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 15453eb..b91fed1 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -5,6 +5,7 @@ import ( "fmt" "log/slog" "os" + "os/exec" "path/filepath" "sync" "time" @@ -51,8 +52,8 @@ type sandboxState struct { slot *network.Slot client *envdclient.Client uffdSocketPath string // non-empty for sandboxes restored from snapshot - dmDevice *devicemapper.SnapshotDevice - baseImagePath string // path to the base template rootfs (for loop registry release) + dmDevice *devicemapper.SnapshotDevice + baseImagePath string // path to the base template rootfs (for loop registry release) // parent holds the snapshot header and diff file paths from which this // sandbox was restored. Non-nil means re-pause should use "Diff" snapshot @@ -94,7 +95,7 @@ func New(cfg Config) *Manager { // Create boots a new sandbox: clone rootfs, set up network, start VM, wait for envd. // If sandboxID is empty, a new ID is generated. -func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, memoryMB, timeoutSec int) (*models.Sandbox, error) { +func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, memoryMB, timeoutSec, diskSizeMB int) (*models.Sandbox, error) { if sandboxID == "" { sandboxID = id.FormatSandboxID(id.NewSandboxID()) } @@ -105,6 +106,9 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, if memoryMB <= 0 { memoryMB = 512 } + if diskSizeMB <= 0 { + diskSizeMB = 20480 // 20 GB default + } if template == "" { template = "minimal" @@ -115,7 +119,7 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, // Check if template refers to a snapshot (has snapfile + memfile + header + rootfs). if snapshot.IsSnapshot(m.cfg.ImagesDir, template) { - return m.createFromSnapshot(ctx, sandboxID, template, vcpus, memoryMB, timeoutSec) + return m.createFromSnapshot(ctx, sandboxID, template, vcpus, memoryMB, timeoutSec, diskSizeMB) } // Resolve base rootfs image: /var/lib/wrenn/images/{template}/rootfs.ext4 @@ -139,7 +143,8 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, // Create dm-snapshot with per-sandbox CoW file. dmName := "wrenn-" + sandboxID cowPath := filepath.Join(m.cfg.SandboxesDir, fmt.Sprintf("%s.cow", sandboxID)) - dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize) + cowSize := int64(diskSizeMB) * 1024 * 1024 + dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize, cowSize) if err != nil { m.loops.Release(baseRootfs) return nil, fmt.Errorf("create dm-snapshot: %w", err) @@ -853,6 +858,17 @@ func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID, name string) (in // Clean up dm device and loop device now that flatten is complete. m.cleanupDM(sb) + // Shrink the flattened image to its minimum size so stored templates are + // compact. EnsureImageSizes will re-expand them on the next agent startup. + if out, err := exec.Command("e2fsck", "-fy", outputPath).CombinedOutput(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() > 1 { + slog.Warn("e2fsck before shrink failed (non-fatal)", "output", string(out), "error", err) + } + } + if out, err := exec.Command("resize2fs", "-M", outputPath).CombinedOutput(); err != nil { + slog.Warn("resize2fs -M failed (non-fatal)", "output", string(out), "error", err) + } + sizeBytes, err := snapshot.DirSize(m.cfg.ImagesDir, name) if err != nil { slog.Warn("failed to calculate template size", "error", err) @@ -891,7 +907,7 @@ func (m *Manager) DeleteSnapshot(name string) error { // in ImagesDir/{snapshotName}/. Uses UFFD for lazy memory loading. // The template's rootfs.ext4 is a flattened standalone image — we create a // dm-snapshot on top of it just like a normal Create. -func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotName string, vcpus, _, timeoutSec int) (*models.Sandbox, error) { +func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotName string, vcpus, _, timeoutSec, diskSizeMB int) (*models.Sandbox, error) { imagesDir := m.cfg.ImagesDir // Read the header. @@ -936,7 +952,8 @@ func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotNam dmName := "wrenn-" + sandboxID cowPath := filepath.Join(m.cfg.SandboxesDir, fmt.Sprintf("%s.cow", sandboxID)) - dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize) + cowSize := int64(diskSizeMB) * 1024 * 1024 + dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize, cowSize) if err != nil { source.Close() m.loops.Release(baseRootfs) diff --git a/internal/service/build.go b/internal/service/build.go index 1bd82a8..19fb5d2 100644 --- a/internal/service/build.go +++ b/internal/service/build.go @@ -199,7 +199,8 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { Template: build.BaseTemplate, Vcpus: build.Vcpus, MemoryMb: build.MemoryMb, - TimeoutSec: 0, // no auto-pause for builds + TimeoutSec: 0, // no auto-pause for builds + DiskSizeMb: 20480, // 20 GB for template builds })) if err != nil { s.failBuild(ctx, buildID, fmt.Sprintf("create sandbox failed: %v", err)) diff --git a/internal/service/sandbox.go b/internal/service/sandbox.go index 89e40c5..96d1282 100644 --- a/internal/service/sandbox.go +++ b/internal/service/sandbox.go @@ -32,6 +32,7 @@ type SandboxCreateParams struct { VCPUs int32 MemoryMB int32 TimeoutSec int32 + DiskSizeMB int32 } // agentForSandbox looks up the host for the given sandbox and returns a client. @@ -77,6 +78,9 @@ func (s *SandboxService) Create(ctx context.Context, p SandboxCreateParams) (db. if p.MemoryMB <= 0 { p.MemoryMB = 512 } + if p.DiskSizeMB <= 0 { + p.DiskSizeMB = 20480 // 20 GB default + } // If the template is a snapshot, use its baked-in vcpus/memory. if tmpl, err := s.DB.GetTemplateByTeam(ctx, db.GetTemplateByTeamParams{Name: p.Template, TeamID: p.TeamID}); err == nil && tmpl.Type == "snapshot" { @@ -117,6 +121,7 @@ func (s *SandboxService) Create(ctx context.Context, p SandboxCreateParams) (db. Vcpus: p.VCPUs, MemoryMb: p.MemoryMB, TimeoutSec: p.TimeoutSec, + DiskSizeMb: p.DiskSizeMB, }); err != nil { return db.Sandbox{}, fmt.Errorf("insert sandbox: %w", err) } @@ -127,6 +132,7 @@ func (s *SandboxService) Create(ctx context.Context, p SandboxCreateParams) (db. Vcpus: p.VCPUs, MemoryMb: p.MemoryMB, TimeoutSec: p.TimeoutSec, + DiskSizeMb: p.DiskSizeMB, })) if err != nil { if _, dbErr := s.DB.UpdateSandboxStatus(ctx, db.UpdateSandboxStatusParams{ diff --git a/proto/hostagent/gen/hostagent.pb.go b/proto/hostagent/gen/hostagent.pb.go index c7436b7..aa29db9 100644 --- a/proto/hostagent/gen/hostagent.pb.go +++ b/proto/hostagent/gen/hostagent.pb.go @@ -33,7 +33,10 @@ type CreateSandboxRequest struct { MemoryMb int32 `protobuf:"varint,3,opt,name=memory_mb,json=memoryMb,proto3" json:"memory_mb,omitempty"` // TTL in seconds. Sandbox is auto-paused after this duration of // inactivity. 0 means no auto-pause. - TimeoutSec int32 `protobuf:"varint,4,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"` + TimeoutSec int32 `protobuf:"varint,4,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"` + // Disk size in MB for the sparse CoW file. Limits how much data the + // sandbox can write beyond the base image. Default: 20480 (20 GB). + DiskSizeMb int32 `protobuf:"varint,6,opt,name=disk_size_mb,json=diskSizeMb,proto3" json:"disk_size_mb,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -103,6 +106,13 @@ func (x *CreateSandboxRequest) GetTimeoutSec() int32 { return 0 } +func (x *CreateSandboxRequest) GetDiskSizeMb() int32 { + if x != nil { + return x.DiskSizeMb + } + return 0 +} + type CreateSandboxResponse struct { state protoimpl.MessageState `protogen:"open.v1"` SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` @@ -2271,7 +2281,7 @@ var File_hostagent_proto protoreflect.FileDescriptor const file_hostagent_proto_rawDesc = "" + "\n" + - "\x0fhostagent.proto\x12\fhostagent.v1\"\xa5\x01\n" + + "\x0fhostagent.proto\x12\fhostagent.v1\"\xc7\x01\n" + "\x14CreateSandboxRequest\x12\x1d\n" + "\n" + "sandbox_id\x18\x05 \x01(\tR\tsandboxId\x12\x1a\n" + @@ -2279,7 +2289,9 @@ const file_hostagent_proto_rawDesc = "" + "\x05vcpus\x18\x02 \x01(\x05R\x05vcpus\x12\x1b\n" + "\tmemory_mb\x18\x03 \x01(\x05R\bmemoryMb\x12\x1f\n" + "\vtimeout_sec\x18\x04 \x01(\x05R\n" + - "timeoutSec\"g\n" + + "timeoutSec\x12 \n" + + "\fdisk_size_mb\x18\x06 \x01(\x05R\n" + + "diskSizeMb\"g\n" + "\x15CreateSandboxResponse\x12\x1d\n" + "\n" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x16\n" + diff --git a/proto/hostagent/hostagent.proto b/proto/hostagent/hostagent.proto index cd93a2d..9af40eb 100644 --- a/proto/hostagent/hostagent.proto +++ b/proto/hostagent/hostagent.proto @@ -85,6 +85,10 @@ message CreateSandboxRequest { // TTL in seconds. Sandbox is auto-paused after this duration of // inactivity. 0 means no auto-pause. int32 timeout_sec = 4; + + // Disk size in MB for the sparse CoW file. Limits how much data the + // sandbox can write beyond the base image. Default: 20480 (20 GB). + int32 disk_size_mb = 6; } message CreateSandboxResponse {