Add diff snapshots for re-pause to avoid UFFD fault-in storm

Use Firecracker's Diff snapshot type when re-pausing a previously
resumed sandbox, capturing only dirty pages instead of a full memory
dump. Chains up to 10 incremental generations before collapsing back
to a Full snapshot. Multi-generation diff files (memfile.{buildID})
are supported alongside the legacy single-file format in resume,
template creation, and snapshot existence checks.
This commit is contained in:
2026-03-13 09:37:54 +06:00
parent a0d635ae5e
commit 80a99eec87
5 changed files with 181 additions and 42 deletions

View File

@ -164,19 +164,19 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
return nil
}
// Snapshot creates a full VM snapshot. The VM must already be paused.
// Produces a snapfile (VM state) and a memfile (full memory dump).
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapPath, memPath string) error {
// Snapshot creates a VM snapshot. The VM must already be paused.
// snapshotType is "Full" (all memory) or "Diff" (only dirty pages since last resume).
func (m *Manager) Snapshot(ctx context.Context, sandboxID, snapPath, memPath, snapshotType string) error {
vm, ok := m.vms[sandboxID]
if !ok {
return fmt.Errorf("VM not found: %s", sandboxID)
}
if err := vm.client.createSnapshot(ctx, snapPath, memPath); err != nil {
if err := vm.client.createSnapshot(ctx, snapPath, memPath, snapshotType); err != nil {
return fmt.Errorf("create snapshot: %w", err)
}
slog.Info("VM snapshot created", "sandbox", sandboxID, "snap_path", snapPath)
slog.Info("VM snapshot created", "sandbox", sandboxID, "snap_path", snapPath, "type", snapshotType)
return nil
}