forked from wrenn/wrenn
feat(vm): replace Firecracker with Cloud Hypervisor
Migrate the entire VM layer from Firecracker to Cloud Hypervisor (CH). CH provides native snapshot/restore via its HTTP API, eliminating the need for custom UFFD handling, memfile processing, and snapshot header management that Firecracker required. Key changes: - Remove fc.go, jailer.go (FC process management) - Remove internal/uffd/ package (userfaultfd lazy page loading) - Remove snapshot/header.go, mapping.go, memfile.go (FC snapshot format) - Add ch.go (CH HTTP API client over Unix socket) - Add process.go (CH process lifecycle with unshare+netns) - Add chversion.go (CH version detection) - Refactor sandbox manager: remove UFFD socket tracking, snapshot parent/diff chaining, FC-specific balloon logic; add crash watcher - Simplify snapshot/local.go to CH's native snapshot format - Update VM config: FirecrackerBin → VMMBin, new CH-specific fields - Update envdclient, devicemapper, network for CH compatibility
This commit is contained in:
157
internal/vm/process.go
Normal file
157
internal/vm/process.go
Normal file
@ -0,0 +1,157 @@
|
||||
package vm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// process represents a running Cloud Hypervisor process with mount and network
|
||||
// namespace isolation.
|
||||
type process struct {
|
||||
cmd *exec.Cmd
|
||||
cancel context.CancelFunc
|
||||
|
||||
exitCh chan struct{}
|
||||
exitErr error
|
||||
}
|
||||
|
||||
// startProcess launches the Cloud Hypervisor binary inside an isolated mount
|
||||
// namespace and the specified network namespace. Used for fresh boot (no
|
||||
// snapshot). The launch sequence:
|
||||
//
|
||||
// 1. unshare -m: creates a private mount namespace
|
||||
// 2. mount --make-rprivate /: prevents mount propagation to host
|
||||
// 3. mount tmpfs at SandboxDir: ephemeral workspace for this VM
|
||||
// 4. symlink kernel and rootfs into SandboxDir
|
||||
// 5. ip netns exec <ns>: enters the network namespace where TAP is configured
|
||||
// 6. exec cloud-hypervisor with the API socket path
|
||||
func startProcess(ctx context.Context, cfg *VMConfig) (*process, error) {
|
||||
script := buildStartScript(cfg)
|
||||
return launchScript(script, cfg)
|
||||
}
|
||||
|
||||
// startProcessForRestore launches a bare Cloud Hypervisor process (no --restore).
|
||||
// The restore is performed via the API after the socket is ready, which allows
|
||||
// passing memory_restore_mode=OnDemand for UFFD lazy paging.
|
||||
func startProcessForRestore(ctx context.Context, cfg *VMConfig) (*process, error) {
|
||||
script := buildRestoreScript(cfg)
|
||||
return launchScript(script, cfg)
|
||||
}
|
||||
|
||||
func launchScript(script string, cfg *VMConfig) (*process, error) {
|
||||
execCtx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
cmd := exec.CommandContext(execCtx, "unshare", "-m", "--", "bash", "-c", script)
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setsid: true,
|
||||
}
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("start cloud-hypervisor process: %w", err)
|
||||
}
|
||||
|
||||
p := &process{
|
||||
cmd: cmd,
|
||||
cancel: cancel,
|
||||
exitCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
go func() {
|
||||
p.exitErr = cmd.Wait()
|
||||
close(p.exitCh)
|
||||
}()
|
||||
|
||||
slog.Info("cloud-hypervisor process started",
|
||||
"pid", cmd.Process.Pid,
|
||||
"sandbox", cfg.SandboxID,
|
||||
)
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// buildStartScript generates the bash script for fresh boot: sets up mount
|
||||
// namespace, symlinks kernel/rootfs, and execs Cloud Hypervisor.
|
||||
func buildStartScript(cfg *VMConfig) string {
|
||||
return fmt.Sprintf(`
|
||||
set -euo pipefail
|
||||
|
||||
mount --make-rprivate /
|
||||
|
||||
mkdir -p %[1]s
|
||||
mount -t tmpfs tmpfs %[1]s
|
||||
|
||||
ln -s %[2]s %[1]s/vmlinux
|
||||
ln -s %[3]s %[1]s/rootfs.ext4
|
||||
|
||||
exec ip netns exec %[4]s %[5]s --api-socket path=%[6]s
|
||||
`,
|
||||
cfg.SandboxDir, // 1
|
||||
cfg.KernelPath, // 2
|
||||
cfg.RootfsPath, // 3
|
||||
cfg.NetworkNamespace, // 4
|
||||
cfg.VMMBin, // 5
|
||||
cfg.SocketPath, // 6
|
||||
)
|
||||
}
|
||||
|
||||
// buildRestoreScript generates the bash script for snapshot restore: sets up
|
||||
// mount namespace, symlinks rootfs, and starts a bare Cloud Hypervisor process.
|
||||
// The actual restore is done via the API (PUT /vm.restore) after the socket is
|
||||
// ready, which enables memory_restore_mode=OnDemand for UFFD lazy paging.
|
||||
func buildRestoreScript(cfg *VMConfig) string {
|
||||
return fmt.Sprintf(`
|
||||
set -euo pipefail
|
||||
|
||||
mount --make-rprivate /
|
||||
|
||||
mkdir -p %[1]s
|
||||
mount -t tmpfs tmpfs %[1]s
|
||||
|
||||
ln -s %[2]s %[1]s/rootfs.ext4
|
||||
|
||||
exec ip netns exec %[3]s %[4]s --api-socket path=%[5]s
|
||||
`,
|
||||
cfg.SandboxDir, // 1
|
||||
cfg.RootfsPath, // 2
|
||||
cfg.NetworkNamespace, // 3
|
||||
cfg.VMMBin, // 4
|
||||
cfg.SocketPath, // 5
|
||||
)
|
||||
}
|
||||
|
||||
// stop sends SIGTERM and waits for the process to exit. If it doesn't exit
|
||||
// within 10 seconds, SIGKILL is sent.
|
||||
func (p *process) stop() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGTERM); err != nil {
|
||||
slog.Debug("sigterm failed, process may have exited", "error", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-p.exitCh:
|
||||
return nil
|
||||
case <-time.After(10 * time.Second):
|
||||
slog.Warn("cloud-hypervisor did not exit after SIGTERM, sending SIGKILL")
|
||||
if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGKILL); err != nil {
|
||||
slog.Debug("sigkill failed", "error", err)
|
||||
}
|
||||
<-p.exitCh
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// exited returns a channel that is closed when the process exits.
|
||||
func (p *process) exited() <-chan struct{} {
|
||||
return p.exitCh
|
||||
}
|
||||
Reference in New Issue
Block a user