package vm import ( "context" "fmt" "log/slog" "os" "os/exec" "strings" "syscall" "time" ) // process represents a running Cloud Hypervisor process with mount and network // namespace isolation. type process struct { cmd *exec.Cmd cancel context.CancelFunc exitCh chan struct{} exitErr error logFile *os.File } // startProcess launches the Cloud Hypervisor binary inside an isolated mount // namespace and the specified network namespace. Used for fresh boot (no // snapshot). The launch sequence: // // 1. unshare -m: creates a private mount namespace // 2. mount --make-rprivate /: prevents mount propagation to host // 3. mount tmpfs at SandboxDir: ephemeral workspace for this VM // 4. symlink kernel and rootfs into SandboxDir // 5. ip netns exec : enters the network namespace where TAP is configured // 6. exec cloud-hypervisor with the API socket path func startProcess(cfg *VMConfig) (*process, error) { script := buildStartScript(cfg) return launchScript(script, cfg) } // startRestoreProcess launches CH in restore mode. It mirrors startProcess // for namespace/tmpfs/symlink setup so the disk paths recorded in the // snapshot's config.json remain valid, then execs CH with `--restore`. func startRestoreProcess(cfg *VMConfig) (*process, error) { script := buildRestoreScript(cfg) return launchScript(script, cfg) } func launchScript(script string, cfg *VMConfig) (*process, error) { execCtx, cancel := context.WithCancel(context.Background()) cmd := exec.CommandContext(execCtx, "unshare", "-m", "--", "bash", "-c", script) cmd.SysProcAttr = &syscall.SysProcAttr{ Setsid: true, } var logFile *os.File if cfg.LogDir != "" { logPath := fmt.Sprintf("%s/ch-%s.log", cfg.LogDir, cfg.SandboxID) f, err := os.OpenFile(logPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640) if err != nil { cancel() return nil, fmt.Errorf("open CH log file %s: %w", logPath, err) } cmd.Stdout = f cmd.Stderr = f logFile = f } if err := cmd.Start(); err != nil { cancel() if logFile != nil { logFile.Close() } return nil, fmt.Errorf("start cloud-hypervisor process: %w", err) } p := &process{ cmd: cmd, cancel: cancel, exitCh: make(chan struct{}), logFile: logFile, } go func() { p.exitErr = cmd.Wait() if p.logFile != nil { p.logFile.Close() } close(p.exitCh) }() slog.Info("cloud-hypervisor process started", "pid", cmd.Process.Pid, "sandbox", cfg.SandboxID, ) return p, nil } // buildStartScript generates the bash script for fresh boot: sets up mount // namespace, symlinks kernel/rootfs, and execs Cloud Hypervisor. func buildStartScript(cfg *VMConfig) string { return buildLaunchScript(cfg, "") } // buildRestoreScript generates the bash script for restoring a VM from a // snapshot directory. The mount/symlink prelude is identical to fresh boot // so disk paths in the snapshot config.json resolve correctly. func buildRestoreScript(cfg *VMConfig) string { dir := strings.TrimRight(cfg.RestoreFromDir, "/") restoreArg := fmt.Sprintf("--restore source_url=file://%s/", dir) if cfg.RestoreLazyMemory { restoreArg += ",memory_restore_mode=ondemand" } return buildLaunchScript(cfg, restoreArg) } // buildLaunchScript composes the namespace/tmpfs/symlink prelude and the // final cloud-hypervisor exec line. extraArgs is appended verbatim — used // to inject `--restore source_url=...` for restore launches. func buildLaunchScript(cfg *VMConfig, extraArgs string) string { chCmd := fmt.Sprintf("ip netns exec %s %s --api-socket path=%s", cfg.NetworkNamespace, cfg.VMMBin, cfg.SocketPath) if extraArgs != "" { chCmd += " " + extraArgs } return fmt.Sprintf(` set -euo pipefail mount --make-rprivate / mkdir -p %[1]s mount -t tmpfs tmpfs %[1]s ln -s %[2]s %[1]s/vmlinux ln -s %[3]s %[1]s/rootfs.ext4 exec %[4]s `, cfg.SandboxDir, // 1 cfg.KernelPath, // 2 cfg.RootfsPath, // 3 chCmd, // 4 ) } // stop sends SIGTERM and waits for the process to exit. If it doesn't exit // within 10 seconds, SIGKILL is sent. func (p *process) stop() error { if p.cmd.Process == nil { return nil } if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGTERM); err != nil { slog.Debug("sigterm failed, process may have exited", "error", err) } select { case <-p.exitCh: return nil case <-time.After(10 * time.Second): slog.Warn("cloud-hypervisor did not exit after SIGTERM, sending SIGKILL") if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGKILL); err != nil { slog.Debug("sigkill failed", "error", err) } <-p.exitCh return nil } } // exited returns a channel that is closed when the process exits. func (p *process) exited() <-chan struct{} { return p.exitCh }