Add device-mapper snapshots, test UI, fix pause ordering and lint errors
- Replace reflink rootfs copy with device-mapper snapshots (shared read-only loop device per base template, per-sandbox sparse CoW file) - Add devicemapper package with create/restore/remove/flatten operations and refcounted LoopRegistry for base image loop devices - Fix pause ordering: destroy VM before removing dm-snapshot to avoid "device busy" error (FC must release the dm device first) - Add test UI at GET /test for sandbox lifecycle management (create, pause, resume, destroy, exec, snapshot create/list/delete) - Fix DirSize to report actual disk usage (stat.Blocks * 512) instead of apparent size, so sparse CoW files report correctly - Add timing logs to pause flow for performance diagnostics - Fix all lint errors across api, network, vm, uffd, and sandbox packages - Remove obsolete internal/filesystem package (replaced by devicemapper) - Update CLAUDE.md with device-mapper architecture documentation
This commit is contained in:
@ -10,8 +10,8 @@ type VMConfig struct {
|
||||
// KernelPath is the path to the uncompressed Linux kernel (vmlinux).
|
||||
KernelPath string
|
||||
|
||||
// RootfsPath is the path to the ext4 rootfs image for this sandbox.
|
||||
// This should be a per-sandbox copy (reflink clone of the base image).
|
||||
// RootfsPath is the path to the rootfs block device for this sandbox.
|
||||
// Typically a dm-snapshot device (e.g., /dev/mapper/wrenn-sb-a1b2c3d4).
|
||||
RootfsPath string
|
||||
|
||||
// VCPUs is the number of virtual CPUs to allocate (default: 1).
|
||||
|
||||
@ -131,15 +131,6 @@ func (c *fcClient) createSnapshot(ctx context.Context, snapPath, memPath string)
|
||||
})
|
||||
}
|
||||
|
||||
// loadSnapshot loads a VM snapshot from a file-backed memory image.
|
||||
func (c *fcClient) loadSnapshot(ctx context.Context, snapPath, memPath string) error {
|
||||
return c.do(ctx, http.MethodPut, "/snapshot/load", map[string]any{
|
||||
"snapshot_path": snapPath,
|
||||
"mem_file_path": memPath,
|
||||
"resume_vm": false,
|
||||
})
|
||||
}
|
||||
|
||||
// loadSnapshotWithUffd loads a VM snapshot using a UFFD socket for
|
||||
// lazy memory loading. Firecracker will connect to the socket and
|
||||
// send the uffd fd + memory region mappings.
|
||||
|
||||
@ -53,7 +53,7 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
|
||||
// Step 2: Wait for the API socket to appear.
|
||||
if err := waitForSocket(ctx, cfg.SocketPath, proc); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("wait for socket: %w", err)
|
||||
}
|
||||
|
||||
@ -61,13 +61,13 @@ func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
|
||||
client := newFCClient(cfg.SocketPath)
|
||||
|
||||
if err := configureVM(ctx, client, &cfg); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("configure VM: %w", err)
|
||||
}
|
||||
|
||||
// Step 4: Start the VM.
|
||||
if err := client.startVM(ctx); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("start VM: %w", err)
|
||||
}
|
||||
|
||||
@ -218,7 +218,7 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
|
||||
|
||||
// Step 2: Wait for the API socket.
|
||||
if err := waitForSocket(ctx, cfg.SocketPath, proc); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("wait for socket: %w", err)
|
||||
}
|
||||
|
||||
@ -228,13 +228,13 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
|
||||
// No boot resources are configured — the snapshot carries kernel,
|
||||
// drive, network, and machine config state.
|
||||
if err := client.loadSnapshotWithUffd(ctx, snapPath, uffdSocketPath); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("load snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Step 4: Resume the VM.
|
||||
if err := client.resumeVM(ctx); err != nil {
|
||||
proc.stop()
|
||||
_ = proc.stop()
|
||||
return nil, fmt.Errorf("resume VM: %w", err)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user