forked from wrenn/wrenn
v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev> Reviewed-on: wrenn/wrenn#50
This commit is contained in:
@ -5,18 +5,19 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// VM represents a running Firecracker microVM.
|
||||
// VM represents a running Cloud Hypervisor microVM.
|
||||
type VM struct {
|
||||
Config VMConfig
|
||||
process *process
|
||||
client *fcClient
|
||||
client *chClient
|
||||
}
|
||||
|
||||
// Manager handles the lifecycle of Firecracker microVMs.
|
||||
// Manager handles the lifecycle of Cloud Hypervisor microVMs.
|
||||
type Manager struct {
|
||||
mu sync.RWMutex
|
||||
// vms tracks running VMs by sandbox ID.
|
||||
@ -30,7 +31,7 @@ func NewManager() *Manager {
|
||||
}
|
||||
}
|
||||
|
||||
// Create boots a new Firecracker microVM with the given configuration.
|
||||
// Create boots a new Cloud Hypervisor microVM with the given configuration.
|
||||
// The network namespace and TAP device must already be set up.
|
||||
func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
cfg.applyDefaults()
|
||||
@ -38,7 +39,6 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
return nil, fmt.Errorf("invalid config: %w", err)
|
||||
}
|
||||
|
||||
// Clean up any leftover socket from a previous run.
|
||||
os.Remove(cfg.SocketPath)
|
||||
|
||||
slog.Info("creating VM",
|
||||
@ -47,8 +47,8 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
"memory_mb", cfg.MemoryMB,
|
||||
)
|
||||
|
||||
// Step 1: Launch the Firecracker process.
|
||||
proc, err := startProcess(ctx, &cfg)
|
||||
// Step 1: Launch the Cloud Hypervisor process.
|
||||
proc, err := startProcess(&cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("start process: %w", err)
|
||||
}
|
||||
@ -59,25 +59,18 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
return nil, fmt.Errorf("wait for socket: %w", err)
|
||||
}
|
||||
|
||||
// Step 3: Configure the VM via the Firecracker API.
|
||||
client := newFCClient(cfg.SocketPath)
|
||||
// Step 3: Configure and boot the VM via a single API call.
|
||||
client := newCHClient(cfg.SocketPath)
|
||||
|
||||
if err := configureVM(ctx, client, &cfg); err != nil {
|
||||
if err := client.createVM(ctx, &cfg); err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("configure VM: %w", err)
|
||||
return nil, fmt.Errorf("create VM config: %w", err)
|
||||
}
|
||||
|
||||
// Step 4: Start the VM.
|
||||
if err := client.startVM(ctx); err != nil {
|
||||
// Step 4: Boot the VM.
|
||||
if err := client.bootVM(ctx); err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("start VM: %w", err)
|
||||
}
|
||||
|
||||
// Step 5: Push sandbox metadata into MMDS so envd can read
|
||||
// WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID from inside the guest.
|
||||
if err := client.setMMDS(ctx, cfg.SandboxID, cfg.TemplateID); err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("set MMDS metadata: %w", err)
|
||||
return nil, fmt.Errorf("boot VM: %w", err)
|
||||
}
|
||||
|
||||
vm := &VM{
|
||||
@ -95,78 +88,34 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
// configureVM sends the configuration to Firecracker via its HTTP API.
|
||||
func configureVM(ctx context.Context, client *fcClient, cfg *VMConfig) error {
|
||||
// Boot source (kernel + args)
|
||||
if err := client.setBootSource(ctx, cfg.KernelPath, cfg.kernelArgs()); err != nil {
|
||||
return fmt.Errorf("set boot source: %w", err)
|
||||
}
|
||||
|
||||
// Root drive — use the symlink path inside the mount namespace so that
|
||||
// snapshots record a stable path that works on restore.
|
||||
rootfsSymlink := cfg.SandboxDir + "/rootfs.ext4"
|
||||
if err := client.setRootfsDrive(ctx, "rootfs", rootfsSymlink, false); err != nil {
|
||||
return fmt.Errorf("set rootfs drive: %w", err)
|
||||
}
|
||||
|
||||
// Network interface
|
||||
if err := client.setNetworkInterface(ctx, "eth0", cfg.TapDevice, cfg.TapMAC); err != nil {
|
||||
return fmt.Errorf("set network interface: %w", err)
|
||||
}
|
||||
|
||||
// Machine config (vCPUs + memory)
|
||||
if err := client.setMachineConfig(ctx, cfg.VCPUs, cfg.MemoryMB); err != nil {
|
||||
return fmt.Errorf("set machine config: %w", err)
|
||||
}
|
||||
|
||||
// Balloon device — allows the host to reclaim unused guest memory.
|
||||
// Start with 0 (no inflation). deflate_on_oom lets the guest reclaim
|
||||
// balloon pages under memory pressure. Stats interval enables monitoring.
|
||||
if err := client.setBalloon(ctx, 0, true, 5); err != nil {
|
||||
slog.Warn("set balloon failed (non-fatal, VM will run without memory reclaim)", "error", err)
|
||||
}
|
||||
|
||||
// MMDS config — enable V2 token access on eth0 so that envd can read
|
||||
// WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID from inside the guest.
|
||||
if err := client.setMMDSConfig(ctx, "eth0"); err != nil {
|
||||
return fmt.Errorf("set MMDS config: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Pause pauses a running VM.
|
||||
// Pause freezes a running VM's vCPUs via the CH API.
|
||||
func (m *Manager) Pause(ctx context.Context, sandboxID string) error {
|
||||
m.mu.RLock()
|
||||
vm, ok := m.vms[sandboxID]
|
||||
m.mu.RUnlock()
|
||||
vm, ok := m.Get(sandboxID)
|
||||
if !ok {
|
||||
return fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
|
||||
if err := vm.client.pauseVM(ctx); err != nil {
|
||||
return fmt.Errorf("pause VM: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("VM paused", "sandbox", sandboxID)
|
||||
return nil
|
||||
return vm.client.pauseVM(ctx)
|
||||
}
|
||||
|
||||
// Resume resumes a paused VM.
|
||||
// Resume unfreezes a paused VM via the CH API.
|
||||
func (m *Manager) Resume(ctx context.Context, sandboxID string) error {
|
||||
m.mu.RLock()
|
||||
vm, ok := m.vms[sandboxID]
|
||||
m.mu.RUnlock()
|
||||
vm, ok := m.Get(sandboxID)
|
||||
if !ok {
|
||||
return fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
return vm.client.resumeVM(ctx)
|
||||
}
|
||||
|
||||
if err := vm.client.resumeVM(ctx); err != nil {
|
||||
return fmt.Errorf("resume VM: %w", err)
|
||||
// Info returns the CH VM state (e.g. "Running", "Paused", "Shutdown") via
|
||||
// the CH unix-socket API. Returns an error if the socket is dead or the VM
|
||||
// is not registered. Use to probe liveness before issuing destructive ops
|
||||
// like pause or snapshot.
|
||||
func (m *Manager) Info(ctx context.Context, sandboxID string) (string, error) {
|
||||
vm, ok := m.Get(sandboxID)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
|
||||
slog.Info("VM resumed", "sandbox", sandboxID)
|
||||
return nil
|
||||
return vm.client.vmInfo(ctx)
|
||||
}
|
||||
|
||||
// UpdateBalloon adjusts the balloon target for a running VM.
|
||||
@ -179,7 +128,8 @@ func (m *Manager) UpdateBalloon(ctx context.Context, sandboxID string, amountMiB
|
||||
return fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
|
||||
return vm.client.updateBalloon(ctx, amountMiB)
|
||||
sizeBytes := int64(amountMiB) * 1024 * 1024
|
||||
return vm.client.resizeBalloon(ctx, sizeBytes)
|
||||
}
|
||||
|
||||
// Destroy stops and cleans up a VM.
|
||||
@ -190,103 +140,98 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
|
||||
m.mu.Unlock()
|
||||
return fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
delete(m.vms, sandboxID)
|
||||
m.mu.Unlock()
|
||||
|
||||
slog.Info("destroying VM", "sandbox", sandboxID)
|
||||
|
||||
// Stop the Firecracker process.
|
||||
// Try clean shutdown first, fall back to process kill.
|
||||
shutdownCtx, shutdownCancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
if err := vm.client.shutdownVMM(shutdownCtx); err != nil {
|
||||
slog.Debug("clean VMM shutdown failed, killing process", "sandbox", sandboxID, "error", err)
|
||||
}
|
||||
shutdownCancel()
|
||||
|
||||
if err := vm.process.stop(); err != nil {
|
||||
slog.Warn("error stopping process", "sandbox", sandboxID, "error", err)
|
||||
}
|
||||
|
||||
// Clean up the API socket.
|
||||
os.Remove(vm.Config.SocketPath)
|
||||
|
||||
m.mu.Lock()
|
||||
delete(m.vms, sandboxID)
|
||||
m.mu.Unlock()
|
||||
|
||||
slog.Info("VM destroyed", "sandbox", sandboxID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Snapshot creates a VM snapshot. The VM must already be paused.
|
||||
// snapshotType is "Full" (all memory) or "Diff" (only dirty pages since last resume).
|
||||
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapPath, memPath, snapshotType string) error {
|
||||
m.mu.RLock()
|
||||
vm, ok := m.vms[sandboxID]
|
||||
m.mu.RUnlock()
|
||||
// Snapshot writes the VM's config/state/memory to snapshotDir via CH's
|
||||
// vm.snapshot API. The VM must already be paused. snapshotDir must be an
|
||||
// absolute path; it is passed to CH as `file://{dir}/`.
|
||||
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapshotDir string) error {
|
||||
vm, ok := m.Get(sandboxID)
|
||||
if !ok {
|
||||
return fmt.Errorf("VM not found: %s", sandboxID)
|
||||
}
|
||||
|
||||
if err := vm.client.createSnapshot(ctx, snapPath, memPath, snapshotType); err != nil {
|
||||
return fmt.Errorf("create snapshot: %w", err)
|
||||
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
|
||||
slog.Info("VM snapshot created", "sandbox", sandboxID, "snap_path", snapPath, "type", snapshotType)
|
||||
url := "file://" + strings.TrimRight(snapshotDir, "/") + "/"
|
||||
if err := vm.client.snapshotVM(ctx, url); err != nil {
|
||||
return fmt.Errorf("vm.snapshot: %w", err)
|
||||
}
|
||||
slog.Info("VM snapshot written", "sandbox", sandboxID, "dir", snapshotDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateFromSnapshot boots a new Firecracker VM by loading a snapshot
|
||||
// using UFFD for lazy memory loading. The network namespace and TAP
|
||||
// device must already be set up.
|
||||
// CreateFromSnapshot launches a Cloud Hypervisor process in restore mode,
|
||||
// connecting it to an existing snapshot directory. The VM is left in the
|
||||
// paused state — the caller is expected to call Resume after any post-restore
|
||||
// setup (e.g. re-acquiring envd connectivity is implicit via TCP).
|
||||
//
|
||||
// No boot resources (kernel, drives, machine config) are configured —
|
||||
// the snapshot carries all that state. The rootfs path recorded in the
|
||||
// snapshot is resolved via a stable symlink at SandboxDir/rootfs.ext4
|
||||
// inside the mount namespace (created by the start script in jailer.go).
|
||||
//
|
||||
// The sequence is:
|
||||
// 1. Start FC process in mount+network namespace (creates tmpfs + rootfs symlink)
|
||||
// 2. Wait for API socket
|
||||
// 3. Load snapshot with UFFD backend
|
||||
// 4. Resume VM execution
|
||||
func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath, uffdSocketPath string) (*VM, error) {
|
||||
// cfg.RestoreFromDir must point to an absolute path containing the CH
|
||||
// snapshot artefacts. The disk path inside config.json must already resolve
|
||||
// (CH receives the same SandboxDir/rootfs.ext4 symlink as for fresh boot).
|
||||
func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
cfg.applyDefaults()
|
||||
if err := cfg.validate(); err != nil {
|
||||
return nil, fmt.Errorf("invalid config: %w", err)
|
||||
}
|
||||
if cfg.RestoreFromDir == "" {
|
||||
return nil, fmt.Errorf("RestoreFromDir is required for restore")
|
||||
}
|
||||
|
||||
os.Remove(cfg.SocketPath)
|
||||
|
||||
slog.Info("restoring VM from snapshot",
|
||||
"sandbox", cfg.SandboxID,
|
||||
"snap_path", snapPath,
|
||||
"restore_dir", cfg.RestoreFromDir,
|
||||
"lazy_memory", cfg.RestoreLazyMemory,
|
||||
)
|
||||
|
||||
// Step 1: Launch the Firecracker process.
|
||||
// The start script creates a tmpfs at SandboxDir and symlinks
|
||||
// rootfs.ext4 → cfg.RootfsPath, so the snapshot's recorded rootfs
|
||||
// path (/fc-vm/rootfs.ext4) resolves to the new clone.
|
||||
proc, err := startProcess(ctx, &cfg)
|
||||
proc, err := startRestoreProcess(&cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("start process: %w", err)
|
||||
return nil, fmt.Errorf("start restore process: %w", err)
|
||||
}
|
||||
|
||||
// Step 2: Wait for the API socket.
|
||||
if err := waitForSocket(ctx, cfg.SocketPath, proc); err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("wait for socket: %w", err)
|
||||
}
|
||||
|
||||
client := newFCClient(cfg.SocketPath)
|
||||
client := newCHClient(cfg.SocketPath)
|
||||
|
||||
// Step 3: Load the snapshot with UFFD backend.
|
||||
// No boot resources are configured — the snapshot carries kernel,
|
||||
// drive, network, and machine config state.
|
||||
if err := client.loadSnapshotWithUffd(ctx, snapPath, uffdSocketPath); err != nil {
|
||||
// Confirm CH actually hydrated the snapshot before registering. Without
|
||||
// this check, a broken snapshot would leave a zombie *VM in the map that
|
||||
// blocks future restores for the same sandbox ID.
|
||||
state, err := client.vmInfo(ctx)
|
||||
if err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("load snapshot: %w", err)
|
||||
return nil, fmt.Errorf("vm.info after restore: %w", err)
|
||||
}
|
||||
|
||||
// Step 4: Resume the VM.
|
||||
if err := client.resumeVM(ctx); err != nil {
|
||||
if state != "Paused" {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("resume VM: %w", err)
|
||||
}
|
||||
|
||||
// Step 5: Push sandbox metadata into MMDS.
|
||||
if err := client.setMMDS(ctx, cfg.SandboxID, cfg.TemplateID); err != nil {
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("set MMDS metadata: %w", err)
|
||||
return nil, fmt.Errorf("unexpected post-restore VM state %q (want Paused)", state)
|
||||
}
|
||||
|
||||
vm := &VM{
|
||||
@ -299,16 +244,20 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
|
||||
m.vms[cfg.SandboxID] = vm
|
||||
m.mu.Unlock()
|
||||
|
||||
slog.Info("VM restored from snapshot", "sandbox", cfg.SandboxID)
|
||||
slog.Info("VM restored from snapshot (paused)", "sandbox", cfg.SandboxID)
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
// PID returns the process ID of the unshare wrapper process.
|
||||
// The actual Firecracker process is a direct child of this PID.
|
||||
func (v *VM) PID() int {
|
||||
return v.process.cmd.Process.Pid
|
||||
}
|
||||
|
||||
// Exited returns a channel that is closed when the VM process exits.
|
||||
func (v *VM) Exited() <-chan struct{} {
|
||||
return v.process.exited()
|
||||
}
|
||||
|
||||
// Get returns a running VM by sandbox ID.
|
||||
func (m *Manager) Get(sandboxID string) (*VM, bool) {
|
||||
m.mu.RLock()
|
||||
@ -317,7 +266,7 @@ func (m *Manager) Get(sandboxID string) (*VM, bool) {
|
||||
return vm, ok
|
||||
}
|
||||
|
||||
// waitForSocket polls for the Firecracker API socket to appear on disk.
|
||||
// waitForSocket polls for the Cloud Hypervisor API socket to appear on disk.
|
||||
func waitForSocket(ctx context.Context, socketPath string, proc *process) error {
|
||||
ticker := time.NewTicker(10 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
@ -329,7 +278,7 @@ func waitForSocket(ctx context.Context, socketPath string, proc *process) error
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-proc.exited():
|
||||
return fmt.Errorf("firecracker process exited before socket was ready")
|
||||
return fmt.Errorf("cloud-hypervisor process exited before socket was ready")
|
||||
case <-timeout:
|
||||
return fmt.Errorf("timed out waiting for API socket at %s", socketPath)
|
||||
case <-ticker.C:
|
||||
|
||||
Reference in New Issue
Block a user