1
0
forked from wrenn/wrenn

feat(vm): replace Firecracker with Cloud Hypervisor

Migrate the entire VM layer from Firecracker to Cloud Hypervisor (CH).
CH provides native snapshot/restore via its HTTP API, eliminating the
need for custom UFFD handling, memfile processing, and snapshot header
management that Firecracker required.

Key changes:
- Remove fc.go, jailer.go (FC process management)
- Remove internal/uffd/ package (userfaultfd lazy page loading)
- Remove snapshot/header.go, mapping.go, memfile.go (FC snapshot format)
- Add ch.go (CH HTTP API client over Unix socket)
- Add process.go (CH process lifecycle with unshare+netns)
- Add chversion.go (CH version detection)
- Refactor sandbox manager: remove UFFD socket tracking, snapshot
  parent/diff chaining, FC-specific balloon logic; add crash watcher
- Simplify snapshot/local.go to CH's native snapshot format
- Update VM config: FirecrackerBin → VMMBin, new CH-specific fields
- Update envdclient, devicemapper, network for CH compatibility
This commit is contained in:
2026-05-17 01:33:12 +06:00
parent c2dc382787
commit eaa6b8576d
25 changed files with 754 additions and 2267 deletions

View File

@ -9,14 +9,14 @@ import (
"time"
)
// VM represents a running Firecracker microVM.
// VM represents a running Cloud Hypervisor microVM.
type VM struct {
Config VMConfig
process *process
client *fcClient
client *chClient
}
// Manager handles the lifecycle of Firecracker microVMs.
// Manager handles the lifecycle of Cloud Hypervisor microVMs.
type Manager struct {
mu sync.RWMutex
// vms tracks running VMs by sandbox ID.
@ -30,7 +30,7 @@ func NewManager() *Manager {
}
}
// Create boots a new Firecracker microVM with the given configuration.
// Create boots a new Cloud Hypervisor microVM with the given configuration.
// The network namespace and TAP device must already be set up.
func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
cfg.applyDefaults()
@ -38,7 +38,6 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
return nil, fmt.Errorf("invalid config: %w", err)
}
// Clean up any leftover socket from a previous run.
os.Remove(cfg.SocketPath)
slog.Info("creating VM",
@ -47,7 +46,7 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
"memory_mb", cfg.MemoryMB,
)
// Step 1: Launch the Firecracker process.
// Step 1: Launch the Cloud Hypervisor process.
proc, err := startProcess(ctx, &cfg)
if err != nil {
return nil, fmt.Errorf("start process: %w", err)
@ -59,25 +58,18 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
return nil, fmt.Errorf("wait for socket: %w", err)
}
// Step 3: Configure the VM via the Firecracker API.
client := newFCClient(cfg.SocketPath)
// Step 3: Configure and boot the VM via a single API call.
client := newCHClient(cfg.SocketPath)
if err := configureVM(ctx, client, &cfg); err != nil {
if err := client.createVM(ctx, &cfg); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("configure VM: %w", err)
return nil, fmt.Errorf("create VM config: %w", err)
}
// Step 4: Start the VM.
if err := client.startVM(ctx); err != nil {
// Step 4: Boot the VM.
if err := client.bootVM(ctx); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("start VM: %w", err)
}
// Step 5: Push sandbox metadata into MMDS so envd can read
// WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID from inside the guest.
if err := client.setMMDS(ctx, cfg.SandboxID, cfg.TemplateID); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("set MMDS metadata: %w", err)
return nil, fmt.Errorf("boot VM: %w", err)
}
vm := &VM{
@ -95,46 +87,6 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
return vm, nil
}
// configureVM sends the configuration to Firecracker via its HTTP API.
func configureVM(ctx context.Context, client *fcClient, cfg *VMConfig) error {
// Boot source (kernel + args)
if err := client.setBootSource(ctx, cfg.KernelPath, cfg.kernelArgs()); err != nil {
return fmt.Errorf("set boot source: %w", err)
}
// Root drive — use the symlink path inside the mount namespace so that
// snapshots record a stable path that works on restore.
rootfsSymlink := cfg.SandboxDir + "/rootfs.ext4"
if err := client.setRootfsDrive(ctx, "rootfs", rootfsSymlink, false); err != nil {
return fmt.Errorf("set rootfs drive: %w", err)
}
// Network interface
if err := client.setNetworkInterface(ctx, "eth0", cfg.TapDevice, cfg.TapMAC); err != nil {
return fmt.Errorf("set network interface: %w", err)
}
// Machine config (vCPUs + memory)
if err := client.setMachineConfig(ctx, cfg.VCPUs, cfg.MemoryMB); err != nil {
return fmt.Errorf("set machine config: %w", err)
}
// Balloon device — allows the host to reclaim unused guest memory.
// Start with 0 (no inflation). deflate_on_oom lets the guest reclaim
// balloon pages under memory pressure. Stats interval enables monitoring.
if err := client.setBalloon(ctx, 0, true, 5); err != nil {
slog.Warn("set balloon failed (non-fatal, VM will run without memory reclaim)", "error", err)
}
// MMDS config — enable V2 token access on eth0 so that envd can read
// WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID from inside the guest.
if err := client.setMMDSConfig(ctx, "eth0"); err != nil {
return fmt.Errorf("set MMDS config: %w", err)
}
return nil
}
// Pause pauses a running VM.
func (m *Manager) Pause(ctx context.Context, sandboxID string) error {
m.mu.RLock()
@ -179,7 +131,8 @@ func (m *Manager) UpdateBalloon(ctx context.Context, sandboxID string, amountMiB
return fmt.Errorf("VM not found: %s", sandboxID)
}
return vm.client.updateBalloon(ctx, amountMiB)
sizeBytes := int64(amountMiB) * 1024 * 1024
return vm.client.resizeBalloon(ctx, sizeBytes)
}
// Destroy stops and cleans up a VM.
@ -195,12 +148,17 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
slog.Info("destroying VM", "sandbox", sandboxID)
// Stop the Firecracker process.
// Try clean shutdown first, fall back to process kill.
shutdownCtx, shutdownCancel := context.WithTimeout(ctx, 5*time.Second)
if err := vm.client.shutdownVMM(shutdownCtx); err != nil {
slog.Debug("clean VMM shutdown failed, killing process", "sandbox", sandboxID, "error", err)
}
shutdownCancel()
if err := vm.process.stop(); err != nil {
slog.Warn("error stopping process", "sandbox", sandboxID, "error", err)
}
// Clean up the API socket.
os.Remove(vm.Config.SocketPath)
slog.Info("VM destroyed", "sandbox", sandboxID)
@ -208,8 +166,8 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
}
// Snapshot creates a VM snapshot. The VM must already be paused.
// snapshotType is "Full" (all memory) or "Diff" (only dirty pages since last resume).
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapPath, memPath, snapshotType string) error {
// destURL is the file:// URL to the snapshot directory.
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapshotDir string) error {
m.mu.RLock()
vm, ok := m.vms[sandboxID]
m.mu.RUnlock()
@ -217,29 +175,35 @@ func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapPath, memPath, sn
return fmt.Errorf("VM not found: %s", sandboxID)
}
if err := vm.client.createSnapshot(ctx, snapPath, memPath, snapshotType); err != nil {
destURL := "file://" + snapshotDir
if err := vm.client.snapshotVM(ctx, destURL); err != nil {
return fmt.Errorf("create snapshot: %w", err)
}
slog.Info("VM snapshot created", "sandbox", sandboxID, "snap_path", snapPath, "type", snapshotType)
slog.Info("VM snapshot created", "sandbox", sandboxID, "snapshot_dir", snapshotDir)
return nil
}
// CreateFromSnapshot boots a new Firecracker VM by loading a snapshot
// using UFFD for lazy memory loading. The network namespace and TAP
// device must already be set up.
// CreateFromSnapshot boots a new Cloud Hypervisor VM by restoring from a
// snapshot directory. The network namespace and TAP device must already be set up.
//
// No boot resources (kernel, drives, machine config) are configured —
// the snapshot carries all that state. The rootfs path recorded in the
// snapshot is resolved via a stable symlink at SandboxDir/rootfs.ext4
// inside the mount namespace (created by the start script in jailer.go).
// A bare CH process is started first, then the restore is performed via the API
// with memory_restore_mode=OnDemand for UFFD-based lazy page loading. This means
// only pages the guest actually touches are faulted in from disk — a 16GB template
// with 2GB active working set only loads ~2GB into RAM at restore time.
//
// The restore API also sets resume=true, so the VM starts running immediately
// without a separate resume call.
//
// The rootfs path recorded in the snapshot is resolved via a stable symlink at
// SandboxDir/rootfs.ext4 inside the mount namespace.
//
// The sequence is:
// 1. Start FC process in mount+network namespace (creates tmpfs + rootfs symlink)
// 1. Start bare CH process in mount+network namespace
// 2. Wait for API socket
// 3. Load snapshot with UFFD backend
// 4. Resume VM execution
func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath, uffdSocketPath string) (*VM, error) {
// 3. Restore VM via API (OnDemand memory + auto-resume)
func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapshotDir string) (*VM, error) {
cfg.SnapshotDir = snapshotDir
cfg.applyDefaults()
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
@ -249,14 +213,11 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
slog.Info("restoring VM from snapshot",
"sandbox", cfg.SandboxID,
"snap_path", snapPath,
"snapshot_dir", snapshotDir,
)
// Step 1: Launch the Firecracker process.
// The start script creates a tmpfs at SandboxDir and symlinks
// rootfs.ext4 → cfg.RootfsPath, so the snapshot's recorded rootfs
// path (/fc-vm/rootfs.ext4) resolves to the new clone.
proc, err := startProcess(ctx, &cfg)
// Step 1: Launch bare CH process (no --restore).
proc, err := startProcessForRestore(ctx, &cfg)
if err != nil {
return nil, fmt.Errorf("start process: %w", err)
}
@ -267,26 +228,13 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
return nil, fmt.Errorf("wait for socket: %w", err)
}
client := newFCClient(cfg.SocketPath)
client := newCHClient(cfg.SocketPath)
// Step 3: Load the snapshot with UFFD backend.
// No boot resources are configured — the snapshot carries kernel,
// drive, network, and machine config state.
if err := client.loadSnapshotWithUffd(ctx, snapPath, uffdSocketPath); err != nil {
// Step 3: Restore via API with OnDemand memory + auto-resume.
sourceURL := "file://" + snapshotDir
if err := client.restoreVM(ctx, sourceURL); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("load snapshot: %w", err)
}
// Step 4: Resume the VM.
if err := client.resumeVM(ctx); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("resume VM: %w", err)
}
// Step 5: Push sandbox metadata into MMDS.
if err := client.setMMDS(ctx, cfg.SandboxID, cfg.TemplateID); err != nil {
_ = proc.stop()
return nil, fmt.Errorf("set MMDS metadata: %w", err)
return nil, fmt.Errorf("restore VM: %w", err)
}
vm := &VM{
@ -304,11 +252,15 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
}
// PID returns the process ID of the unshare wrapper process.
// The actual Firecracker process is a direct child of this PID.
func (v *VM) PID() int {
return v.process.cmd.Process.Pid
}
// Exited returns a channel that is closed when the VM process exits.
func (v *VM) Exited() <-chan struct{} {
return v.process.exited()
}
// Get returns a running VM by sandbox ID.
func (m *Manager) Get(sandboxID string) (*VM, bool) {
m.mu.RLock()
@ -317,7 +269,7 @@ func (m *Manager) Get(sandboxID string) (*VM, bool) {
return vm, ok
}
// waitForSocket polls for the Firecracker API socket to appear on disk.
// waitForSocket polls for the Cloud Hypervisor API socket to appear on disk.
func waitForSocket(ctx context.Context, socketPath string, proc *process) error {
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
@ -329,7 +281,7 @@ func waitForSocket(ctx context.Context, socketPath string, proc *process) error
case <-ctx.Done():
return ctx.Err()
case <-proc.exited():
return fmt.Errorf("firecracker process exited before socket was ready")
return fmt.Errorf("cloud-hypervisor process exited before socket was ready")
case <-timeout:
return fmt.Errorf("timed out waiting for API socket at %s", socketPath)
case <-ticker.C: