1
0
forked from wrenn/wrenn
Files
wrenn-releases/internal/sandbox/launch_snapshot.go
Rafeed M. Bhuiyan 05ddf62399 v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev>

Reviewed-on: wrenn/wrenn#50
2026-05-24 21:10:37 +00:00

188 lines
6.1 KiB
Go

// Package sandbox: launching a fresh sandbox from a snapshot template.
//
// Mirrors the pause/resume restore path but produces a brand-new sandbox each
// call: fresh ID, fresh network slot, fresh CoW on top of the template's
// flattened rootfs. The CH process is launched with --restore + lazy memory
// (UFFD), and the post-restore memory loader is started so any subsequent
// CreateSnapshot taken from this descendant is self-contained (the
// pause-resume-pause chain guarantee, applied to template lineages).
package sandbox
import (
"context"
"fmt"
"log/slog"
"os"
"time"
"github.com/jackc/pgx/v5/pgtype"
"git.omukk.dev/wrenn/wrenn/internal/devicemapper"
"git.omukk.dev/wrenn/wrenn/internal/layout"
"git.omukk.dev/wrenn/wrenn/internal/models"
"git.omukk.dev/wrenn/wrenn/internal/network"
"git.omukk.dev/wrenn/wrenn/pkg/id"
)
// createFromSnapshotTemplate launches a new sandbox from a snapshot-template
// directory (state.json + config.json + memory-ranges + rootfs.ext4).
//
// The caller has already verified IsSnapshotTemplate(templateDir). Resources
// acquired here are rolled back on any failure; on success the sandbox is
// registered in m.boxes and runs in StatusRunning.
func (m *Manager) createFromSnapshotTemplate(
ctx context.Context,
sandboxID string,
teamID, templateID pgtype.UUID,
vcpus, memoryMB, timeoutSec, diskSizeMB int,
defaultUser string,
defaultEnv map[string]string,
) (*models.Sandbox, int64, error) {
templateDir := layout.TemplateDir(m.cfg.WrennDir, teamID, templateID)
baseRootfs := layout.TemplateRootfs(m.cfg.WrennDir, teamID, templateID)
meta, err := readSnapshotMeta(templateDir)
if err != nil {
return nil, 0, fmt.Errorf("read snapshot meta: %w", err)
}
if meta.SandboxDir == "" {
// CH's saved config.json hardcodes a tmpfs disk path; meta.SandboxDir
// is that exact path. A snapshot template without it cannot be launched.
return nil, 0, fmt.Errorf("snapshot template %s missing sandbox_dir in meta", templateDir)
}
// Acquire shared read-only loop on the flattened rootfs. Many sandboxes
// can share this loop concurrently — refcounted in LoopRegistry.
originLoop, err := m.loops.Acquire(baseRootfs)
if err != nil {
return nil, 0, fmt.Errorf("acquire loop: %w", err)
}
originSize, err := devicemapper.OriginSizeBytes(originLoop)
if err != nil {
m.loops.Release(baseRootfs)
return nil, 0, fmt.Errorf("origin size: %w", err)
}
// Per-sandbox CoW on top of the shared origin.
dmName := "wrenn-" + sandboxID
if err := os.MkdirAll(layout.SandboxDir(m.cfg.WrennDir, sandboxID), 0o755); err != nil {
m.loops.Release(baseRootfs)
return nil, 0, fmt.Errorf("create sandbox dir: %w", err)
}
cowPath := layout.SandboxCowPath(m.cfg.WrennDir, sandboxID)
cowSize := max(int64(diskSizeMB)*1024*1024, originSize)
dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize, cowSize)
if err != nil {
m.loops.Release(baseRootfs)
return nil, 0, fmt.Errorf("create dm-snapshot: %w", err)
}
res := &createResources{
sandboxID: sandboxID,
loops: m.loops,
loopImage: baseRootfs,
dmDevice: dmDev,
cowPath: cowPath,
slots: m.slots,
}
slotIdx, err := m.slots.Allocate()
if err != nil {
res.rollback()
return nil, 0, fmt.Errorf("allocate network slot: %w", err)
}
res.slotIdx = slotIdx
slot := network.NewSlot(slotIdx)
if err := network.CreateNetwork(slot); err != nil {
res.rollback()
return nil, 0, fmt.Errorf("create network: %w", err)
}
res.slot = slot
// CH's saved config.json hardcodes a tmpfs disk path; meta.SandboxDir is
// that exact path (carried forward verbatim across template chains, so a
// snapshot-of-a-snapshot resolves to the root ancestor's path). The
// launcher mounts a fresh tmpfs there inside its private mount namespace
// and symlinks rootfs.ext4 → our new dm device.
vmCfg := m.buildRestoreVMConfig(restoreInputs{
sandboxID: sandboxID,
templateID: id.UUIDString(templateID),
snapDir: templateDir,
rootfsPath: dmDev.DevicePath,
vcpus: vcpus,
memoryMB: memoryMB,
slot: slot,
sandboxDir: meta.SandboxDir,
})
client, err := m.launchRestoredVM(ctx, vmCfg, slot.HostIP.String())
if err != nil {
res.rollback()
return nil, 0, err
}
res.vm = m.vm
envdVersion, _ := client.FetchVersion(ctx)
now := time.Now()
sb := &sandboxState{
Sandbox: models.Sandbox{
ID: sandboxID,
Status: models.StatusRunning,
TemplateTeamID: teamID.Bytes,
TemplateID: templateID.Bytes,
VCPUs: vcpus,
MemoryMB: memoryMB,
TimeoutSec: timeoutSec,
SlotIndex: slotIdx,
HostIP: slot.HostIP,
RootfsPath: dmDev.DevicePath,
CreatedAt: now,
LastActiveAt: now,
Metadata: m.buildMetadata(envdVersion),
},
slot: slot,
connTracker: &ConnTracker{},
dmDevice: dmDev,
baseImagePath: baseRootfs,
sandboxDirOverride: meta.SandboxDir,
}
sb.client.Store(client)
m.mu.Lock()
m.boxes[sandboxID] = sb
m.mu.Unlock()
// /init lifecycle bump then start the memory loader. Loader is required
// so any future CreateSnapshot taken from this descendant captures all
// guest pages (otherwise SEEK_DATA/SEEK_HOLE would emit holes for the
// still-lazy UFFD pages — silent corruption across template chains).
m.initAndStartMemoryLoader(ctx, sb, defaultUser, id.UUIDString(templateID), defaultEnv)
m.startSampler(sb)
m.startCrashWatcher(sb)
slog.Info("sandbox launched from snapshot template",
"id", sandboxID,
"team_id", teamID,
"template_id", templateID,
"sandbox_dir", meta.SandboxDir,
"host_ip", slot.HostIP.String(),
"dm_device", dmDev.DevicePath,
)
return &sb.Sandbox, cowSize, nil
}
// templateExists returns true if a snapshot template already lives at
// TemplateDir(team, templateID). Used by CreateSnapshot to refuse silent
// overwrites — every snapshot must land in a fresh templateID.
func (m *Manager) templateExists(teamID, templateID pgtype.UUID) bool {
dir := layout.TemplateDir(m.cfg.WrennDir, teamID, templateID)
if _, err := os.Stat(dir); err != nil {
return false
}
return layout.IsSnapshotTemplate(dir)
}