Add sandbox snapshot and restore with UFFD lazy memory loading
Implement full snapshot lifecycle: pause (snapshot + free resources), resume (UFFD-based lazy restore), and named snapshot templates that can spawn new sandboxes from frozen VM state. Key changes: - Snapshot header system with generational diff mapping (inspired by e2b) - UFFD server for lazy page fault handling during snapshot restore - Stable rootfs symlink path (/tmp/fc-vm/) for snapshot compatibility - Templates DB table and CRUD API endpoints (POST/GET/DELETE /v1/snapshots) - CreateSnapshot/DeleteSnapshot RPCs in hostagent proto - Reconciler excludes paused sandboxes (expected absent from host agent) - Snapshot templates lock vcpus/memory to baked-in values - Proper cleanup of uffd sockets and pause snapshot files on destroy
This commit is contained in:
87
internal/uffd/fd.go
Normal file
87
internal/uffd/fd.go
Normal file
@ -0,0 +1,87 @@
|
||||
// Package uffd implements a userfaultfd-based memory server for Firecracker
|
||||
// snapshot restore. When a VM is restored from a snapshot, instead of loading
|
||||
// the entire memory file upfront, the UFFD handler intercepts page faults
|
||||
// and serves memory pages on demand from the snapshot's compact diff file.
|
||||
//
|
||||
// Inspired by e2b's UFFD implementation (Apache 2.0, modified by Omukk).
|
||||
package uffd
|
||||
|
||||
/*
|
||||
#include <sys/syscall.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
struct uffd_pagefault {
|
||||
__u64 flags;
|
||||
__u64 address;
|
||||
__u32 ptid;
|
||||
};
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const (
|
||||
UFFD_EVENT_PAGEFAULT = C.UFFD_EVENT_PAGEFAULT
|
||||
UFFD_PAGEFAULT_FLAG_WRITE = C.UFFD_PAGEFAULT_FLAG_WRITE
|
||||
UFFDIO_COPY = C.UFFDIO_COPY
|
||||
UFFDIO_COPY_MODE_WP = C.UFFDIO_COPY_MODE_WP
|
||||
)
|
||||
|
||||
type (
|
||||
uffdMsg = C.struct_uffd_msg
|
||||
uffdPagefault = C.struct_uffd_pagefault
|
||||
uffdioCopy = C.struct_uffdio_copy
|
||||
)
|
||||
|
||||
// fd wraps a userfaultfd file descriptor received from Firecracker.
|
||||
type fd uintptr
|
||||
|
||||
// copy installs a page into guest memory at the given address using UFFDIO_COPY.
|
||||
// mode controls write-protection: use UFFDIO_COPY_MODE_WP to preserve WP bit.
|
||||
func (f fd) copy(addr, pagesize uintptr, data []byte, mode C.ulonglong) error {
|
||||
alignedAddr := addr &^ (pagesize - 1)
|
||||
cpy := uffdioCopy{
|
||||
src: C.ulonglong(uintptr(unsafe.Pointer(&data[0]))),
|
||||
dst: C.ulonglong(alignedAddr),
|
||||
len: C.ulonglong(pagesize),
|
||||
mode: mode,
|
||||
copy: 0,
|
||||
}
|
||||
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(f), UFFDIO_COPY, uintptr(unsafe.Pointer(&cpy)))
|
||||
if errno != 0 {
|
||||
return errno
|
||||
}
|
||||
|
||||
if cpy.copy != C.longlong(pagesize) {
|
||||
return fmt.Errorf("UFFDIO_COPY copied %d bytes, expected %d", cpy.copy, pagesize)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// close closes the userfaultfd file descriptor.
|
||||
func (f fd) close() error {
|
||||
return syscall.Close(int(f))
|
||||
}
|
||||
|
||||
// getMsgEvent extracts the event type from a uffd_msg.
|
||||
func getMsgEvent(msg *uffdMsg) C.uchar {
|
||||
return msg.event
|
||||
}
|
||||
|
||||
// getMsgArg extracts the arg union from a uffd_msg.
|
||||
func getMsgArg(msg *uffdMsg) [24]byte {
|
||||
return msg.arg
|
||||
}
|
||||
|
||||
// getPagefaultAddress extracts the faulting address from a uffd_pagefault.
|
||||
func getPagefaultAddress(pf *uffdPagefault) uintptr {
|
||||
return uintptr(pf.address)
|
||||
}
|
||||
Reference in New Issue
Block a user