1
0
forked from wrenn/wrenn
Files
wrenn-releases/internal/sandbox/restore.go
Rafeed M. Bhuiyan 05ddf62399 v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev>

Reviewed-on: wrenn/wrenn#50
2026-05-24 21:10:37 +00:00

119 lines
4.7 KiB
Go

// Package sandbox: shared CH-restore helpers used by both Resume (paused →
// running) and the snapshot-template launch path (template → fresh sandbox).
//
// The two callers diverge in how they acquire resources (slot, dm-snapshot,
// sandbox identity) but converge on:
//
// build VMConfig → CreateFromSnapshot → vm.Resume → wait envd → balloon deflate
//
// These steps are extracted here so the sequence — and its quirks (paused
// post-restore state, balloon best-effort, restored disk path baked into
// CH's config.json) — has a single source of truth.
package sandbox
import (
"context"
"fmt"
"log/slog"
"path/filepath"
"git.omukk.dev/wrenn/wrenn/internal/envdclient"
"git.omukk.dev/wrenn/wrenn/internal/network"
"git.omukk.dev/wrenn/wrenn/internal/vm"
)
// restoreInputs is the common set of fields needed to build a restore VMConfig.
type restoreInputs struct {
sandboxID string // VM identity for the new CH process (sock path, log file)
templateID string // forwarded to envd via PostInit (informational)
snapDir string // directory containing CH snapshot artefacts
rootfsPath string // /dev/mapper/wrenn-{newID} — per-sandbox dm-snapshot
vcpus int
memoryMB int
slot *network.Slot
sandboxDir string // override for VMConfig.SandboxDir; "" = default
}
// buildRestoreVMConfig assembles the VMConfig used to launch a CH process in
// restore mode. sandboxDir, when non-empty, overrides the default
// "/tmp/ch-vm-{SandboxID}" — required when the snapshot's saved config.json
// points at a different sandbox's tmpfs path (i.e. snapshot-template launch).
func (m *Manager) buildRestoreVMConfig(in restoreInputs) vm.VMConfig {
return vm.VMConfig{
SandboxID: in.sandboxID,
TemplateID: in.templateID,
KernelPath: m.cfg.KernelPath,
RootfsPath: in.rootfsPath,
VCPUs: in.vcpus,
MemoryMB: in.memoryMB,
NetworkNamespace: in.slot.NamespaceID,
TapDevice: in.slot.TapName,
TapMAC: in.slot.TapMAC,
GuestIP: in.slot.GuestIP,
GatewayIP: in.slot.TapIP,
NetMask: in.slot.GuestNetMask,
VMMBin: m.cfg.VMMBin,
LogDir: filepath.Join(m.cfg.WrennDir, "logs"),
RestoreFromDir: in.snapDir,
RestoreLazyMemory: true,
SandboxDir: in.sandboxDir,
}
}
// launchRestoredVM starts CH in restore mode, resumes the vCPUs, waits for
// envd to be reachable, then best-effort deflates the balloon. On any failure
// the partial VM is destroyed before returning — the caller is responsible
// for tearing down dm/network/slot.
//
// Returns the connected envd client on success.
func (m *Manager) launchRestoredVM(ctx context.Context, vmCfg vm.VMConfig, hostIP string) (*envdclient.Client, error) {
if _, err := m.vm.CreateFromSnapshot(ctx, vmCfg); err != nil {
return nil, fmt.Errorf("create from snapshot: %w", err)
}
if err := m.vm.Resume(ctx, vmCfg.SandboxID); err != nil {
_ = m.vm.Destroy(context.Background(), vmCfg.SandboxID)
return nil, fmt.Errorf("vm resume: %w", err)
}
client := envdclient.New(hostIP)
waitCtx, waitCancel := context.WithTimeout(ctx, envdReadyTimeout(vmCfg.MemoryMB))
defer waitCancel()
if err := client.WaitUntilReady(waitCtx); err != nil {
_ = m.vm.Destroy(context.Background(), vmCfg.SandboxID)
return nil, fmt.Errorf("wait envd: %w", err)
}
// Best-effort balloon deflate. Free-page reporting drains pages while the
// sandbox runs; the resumed guest needs its full memory budget back. A
// failure leaves the guest memory-starved but doesn't break correctness.
if err := m.vm.UpdateBalloon(ctx, vmCfg.SandboxID, 0); err != nil {
slog.Warn("balloon deflate after restore failed", "id", vmCfg.SandboxID, "error", err)
}
return client, nil
}
// initAndStartMemoryLoader runs envd's /init lifecycle bump and then kicks
// off the background memory loader. Ordering matters: /init resets envd's
// mem_preload_* atomics, so the loader's POST /memory/preload must land
// after — otherwise the next CreateSnapshot/Pause would observe a stale
// "idle" state and snapshot a memfile full of holes.
//
// Must be called with sb already registered in m.boxes with StatusRunning
// and sb.client populated.
func (m *Manager) initAndStartMemoryLoader(ctx context.Context, sb *sandboxState, defaultUser, templateIDStr string, envVars map[string]string) {
initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
defer initCancel()
c := sb.client.Load()
if c == nil {
slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID)
return
}
if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr); err != nil {
slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err)
}
m.startMemoryLoader(sb)
}