Add host agent with VM lifecycle, TAP networking, and envd client

Implements Phase 1: boot a Firecracker microVM, execute a command inside
it via envd, and get the output back. Uses raw Firecracker HTTP API via
Unix socket (not the Go SDK) for full control over the VM lifecycle.

- internal/vm: VM manager with create/pause/resume/destroy, Firecracker
  HTTP client, process launcher with unshare + ip netns exec isolation
- internal/network: per-sandbox network namespace with veth pair, TAP
  device, NAT rules, and IP forwarding
- internal/envdclient: Connect RPC client for envd process/filesystem
  services with health check retry
- cmd/host-agent: demo binary that boots a VM, runs "echo hello", prints
  output, and cleans up
- proto/envd: canonical proto files with buf + protoc-gen-connect-go
  code generation
- images/wrenn-init.sh: minimal PID 1 init script for guest VMs
- CLAUDE.md: updated architecture to reflect TAP networking (not vsock)
  and Firecracker HTTP API (not Go SDK)
This commit is contained in:
2026-03-10 00:06:47 +06:00
parent a3898d68fb
commit 7753938044
26 changed files with 5773 additions and 1275 deletions

View File

@ -0,0 +1,138 @@
package envdclient
import (
"context"
"fmt"
"io"
"log/slog"
"net/http"
"connectrpc.com/connect"
envdpb "git.omukk.dev/wrenn/sandbox/proto/envd/gen"
"git.omukk.dev/wrenn/sandbox/proto/envd/gen/genconnect"
)
// Client wraps the Connect RPC client for envd's Process and Filesystem services.
type Client struct {
hostIP string
base string
healthURL string
httpClient *http.Client
process genconnect.ProcessClient
filesystem genconnect.FilesystemClient
}
// New creates a new envd client that connects to the given host IP.
func New(hostIP string) *Client {
base := baseURL(hostIP)
httpClient := newHTTPClient()
return &Client{
hostIP: hostIP,
base: base,
healthURL: base + "/health",
httpClient: httpClient,
process: genconnect.NewProcessClient(httpClient, base),
filesystem: genconnect.NewFilesystemClient(httpClient, base),
}
}
// ExecResult holds the output of a command execution.
type ExecResult struct {
Stdout []byte
Stderr []byte
ExitCode int32
}
// Exec runs a command inside the sandbox and collects all stdout/stderr output.
// It blocks until the command completes.
func (c *Client) Exec(ctx context.Context, cmd string, args ...string) (*ExecResult, error) {
stdin := false
req := connect.NewRequest(&envdpb.StartRequest{
Process: &envdpb.ProcessConfig{
Cmd: cmd,
Args: args,
},
Stdin: &stdin,
})
stream, err := c.process.Start(ctx, req)
if err != nil {
return nil, fmt.Errorf("start process: %w", err)
}
defer stream.Close()
result := &ExecResult{}
for stream.Receive() {
msg := stream.Msg()
if msg.Event == nil {
continue
}
event := msg.Event.GetEvent()
switch e := event.(type) {
case *envdpb.ProcessEvent_Start:
slog.Debug("process started", "pid", e.Start.GetPid())
case *envdpb.ProcessEvent_Data:
output := e.Data.GetOutput()
switch o := output.(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
result.Stdout = append(result.Stdout, o.Stdout...)
case *envdpb.ProcessEvent_DataEvent_Stderr:
result.Stderr = append(result.Stderr, o.Stderr...)
}
case *envdpb.ProcessEvent_End:
result.ExitCode = e.End.GetExitCode()
if e.End.Error != nil {
slog.Debug("process ended with error",
"exit_code", e.End.GetExitCode(),
"error", e.End.GetError(),
)
}
case *envdpb.ProcessEvent_Keepalive:
// Ignore keepalives.
}
}
if err := stream.Err(); err != nil && err != io.EOF {
return result, fmt.Errorf("stream error: %w", err)
}
return result, nil
}
// WriteFile writes content to a file inside the sandbox via envd's filesystem service.
func (c *Client) WriteFile(ctx context.Context, path string, content []byte) error {
// envd uses HTTP upload for files, not Connect RPC.
// POST /files with multipart form data.
// For now, use the filesystem MakeDir for directories.
// TODO: Implement file upload via envd's REST endpoint.
return fmt.Errorf("WriteFile not yet implemented")
}
// ReadFile reads a file from inside the sandbox.
func (c *Client) ReadFile(ctx context.Context, path string) ([]byte, error) {
// TODO: Implement file download via envd's REST endpoint.
return nil, fmt.Errorf("ReadFile not yet implemented")
}
// ListDir lists directory contents inside the sandbox.
func (c *Client) ListDir(ctx context.Context, path string, depth uint32) (*envdpb.ListDirResponse, error) {
req := connect.NewRequest(&envdpb.ListDirRequest{
Path: path,
Depth: depth,
})
resp, err := c.filesystem.ListDir(ctx, req)
if err != nil {
return nil, fmt.Errorf("list dir: %w", err)
}
return resp.Msg, nil
}

View File

@ -0,0 +1,21 @@
package envdclient
import (
"fmt"
"net/http"
)
// envdPort is the default port envd listens on inside the guest.
const envdPort = 49983
// baseURL returns the HTTP base URL for reaching envd at the given host IP.
func baseURL(hostIP string) string {
return fmt.Sprintf("http://%s:%d", hostIP, envdPort)
}
// newHTTPClient returns an http.Client suitable for talking to envd.
// No special transport is needed — envd is reachable via the host IP
// through the veth/TAP network path.
func newHTTPClient() *http.Client {
return &http.Client{}
}

View File

@ -0,0 +1,52 @@
package envdclient
import (
"context"
"fmt"
"log/slog"
"net/http"
"time"
)
// WaitUntilReady polls envd's health endpoint until it responds successfully
// or the context is cancelled. It retries every retryInterval.
func (c *Client) WaitUntilReady(ctx context.Context) error {
const retryInterval = 100 * time.Millisecond
slog.Info("waiting for envd to be ready", "url", c.healthURL)
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("envd not ready: %w", ctx.Err())
case <-ticker.C:
if err := c.healthCheck(ctx); err == nil {
slog.Info("envd is ready", "host", c.hostIP)
return nil
}
}
}
}
// healthCheck sends a single GET /health request to envd.
func (c *Client) healthCheck(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil)
if err != nil {
return err
}
resp, err := c.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent {
return fmt.Errorf("health check returned %d", resp.StatusCode)
}
return nil
}

View File

@ -0,0 +1 @@
package network

View File

@ -0,0 +1 @@
package network

View File

@ -0,0 +1 @@
package network

391
internal/network/setup.go Normal file
View File

@ -0,0 +1,391 @@
package network
import (
"fmt"
"log/slog"
"net"
"os/exec"
"runtime"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
)
const (
// Fixed addresses inside each network namespace (safe because each
// sandbox gets its own netns).
tapName = "tap0"
tapIP = "169.254.0.22"
tapMask = 30
tapMAC = "02:FC:00:00:00:05"
guestIP = "169.254.0.21"
guestNetMask = "255.255.255.252"
// Base IPs for host-reachable and veth addressing.
hostBase = "10.11.0.0"
vrtBase = "10.12.0.0"
// Each slot gets a /31 from the vrt range (2 IPs per slot).
vrtAddressesPerSlot = 2
)
// Slot holds the network addressing for a single sandbox.
type Slot struct {
Index int
// Derived addresses
HostIP net.IP // 10.11.0.{idx} — reachable from host
VethIP net.IP // 10.12.0.{idx*2} — host side of veth pair
VpeerIP net.IP // 10.12.0.{idx*2+1} — namespace side of veth
// Fixed per-namespace
TapIP string // 169.254.0.22
TapMask int // 30
TapMAC string // 02:FC:00:00:00:05
GuestIP string // 169.254.0.21
GuestNetMask string // 255.255.255.252
TapName string // tap0
// Names
NamespaceID string // ns-{idx}
VethName string // veth-{idx}
}
// NewSlot computes the addressing for the given slot index (1-based).
func NewSlot(index int) *Slot {
hostBaseIP := net.ParseIP(hostBase).To4()
vrtBaseIP := net.ParseIP(vrtBase).To4()
hostIP := make(net.IP, 4)
copy(hostIP, hostBaseIP)
hostIP[2] += byte(index / 256)
hostIP[3] += byte(index % 256)
vethOffset := index * vrtAddressesPerSlot
vethIP := make(net.IP, 4)
copy(vethIP, vrtBaseIP)
vethIP[2] += byte(vethOffset / 256)
vethIP[3] += byte(vethOffset % 256)
vpeerIP := make(net.IP, 4)
copy(vpeerIP, vrtBaseIP)
vpeerIP[2] += byte((vethOffset + 1) / 256)
vpeerIP[3] += byte((vethOffset + 1) % 256)
return &Slot{
Index: index,
HostIP: hostIP,
VethIP: vethIP,
VpeerIP: vpeerIP,
TapIP: tapIP,
TapMask: tapMask,
TapMAC: tapMAC,
GuestIP: guestIP,
GuestNetMask: guestNetMask,
TapName: tapName,
NamespaceID: fmt.Sprintf("ns-%d", index),
VethName: fmt.Sprintf("veth-%d", index),
}
}
// CreateNetwork sets up the full network topology for a sandbox:
// - Named network namespace
// - Veth pair bridging host and namespace
// - TAP device inside namespace for Firecracker
// - Routes and NAT rules for connectivity
func CreateNetwork(slot *Slot) error {
// Lock this goroutine to the OS thread — required for netns manipulation.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
// Save host namespace.
hostNS, err := netns.Get()
if err != nil {
return fmt.Errorf("get host namespace: %w", err)
}
defer hostNS.Close()
defer netns.Set(hostNS)
// Create named network namespace.
ns, err := netns.NewNamed(slot.NamespaceID)
if err != nil {
return fmt.Errorf("create namespace %s: %w", slot.NamespaceID, err)
}
defer ns.Close()
// We are now inside the new namespace.
slog.Info("created network namespace", "ns", slot.NamespaceID)
// Create veth pair. Both ends start in the new namespace.
vethAttrs := netlink.NewLinkAttrs()
vethAttrs.Name = slot.VethName
veth := &netlink.Veth{
LinkAttrs: vethAttrs,
PeerName: "eth0",
}
if err := netlink.LinkAdd(veth); err != nil {
return fmt.Errorf("create veth pair: %w", err)
}
// Configure vpeer (eth0) inside namespace.
vpeer, err := netlink.LinkByName("eth0")
if err != nil {
return fmt.Errorf("find eth0: %w", err)
}
vpeerAddr := &netlink.Addr{
IPNet: &net.IPNet{
IP: slot.VpeerIP,
Mask: net.CIDRMask(31, 32),
},
}
if err := netlink.AddrAdd(vpeer, vpeerAddr); err != nil {
return fmt.Errorf("set vpeer addr: %w", err)
}
if err := netlink.LinkSetUp(vpeer); err != nil {
return fmt.Errorf("bring up vpeer: %w", err)
}
// Move veth to host namespace.
vethLink, err := netlink.LinkByName(slot.VethName)
if err != nil {
return fmt.Errorf("find veth: %w", err)
}
if err := netlink.LinkSetNsFd(vethLink, int(hostNS)); err != nil {
return fmt.Errorf("move veth to host ns: %w", err)
}
// Create TAP device inside namespace.
tapAttrs := netlink.NewLinkAttrs()
tapAttrs.Name = tapName
tap := &netlink.Tuntap{
LinkAttrs: tapAttrs,
Mode: netlink.TUNTAP_MODE_TAP,
}
if err := netlink.LinkAdd(tap); err != nil {
return fmt.Errorf("create tap device: %w", err)
}
tapLink, err := netlink.LinkByName(tapName)
if err != nil {
return fmt.Errorf("find tap: %w", err)
}
tapAddr := &netlink.Addr{
IPNet: &net.IPNet{
IP: net.ParseIP(tapIP),
Mask: net.CIDRMask(tapMask, 32),
},
}
if err := netlink.AddrAdd(tapLink, tapAddr); err != nil {
return fmt.Errorf("set tap addr: %w", err)
}
if err := netlink.LinkSetUp(tapLink); err != nil {
return fmt.Errorf("bring up tap: %w", err)
}
// Bring up loopback.
lo, err := netlink.LinkByName("lo")
if err != nil {
return fmt.Errorf("find loopback: %w", err)
}
if err := netlink.LinkSetUp(lo); err != nil {
return fmt.Errorf("bring up loopback: %w", err)
}
// Default route inside namespace — traffic exits via veth on host.
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Gw: slot.VethIP,
}); err != nil {
return fmt.Errorf("add default route in namespace: %w", err)
}
// Enable IP forwarding inside namespace (eth0 -> tap0).
if err := nsExec(slot.NamespaceID,
"sysctl", "-w", "net.ipv4.ip_forward=1",
); err != nil {
return fmt.Errorf("enable ip_forward in namespace: %w", err)
}
// NAT rules inside namespace:
// Outbound: guest (169.254.0.21) -> internet. SNAT to vpeer IP so replies return.
if err := iptables(slot.NamespaceID,
"-t", "nat", "-A", "POSTROUTING",
"-o", "eth0", "-s", guestIP,
"-j", "SNAT", "--to", slot.VpeerIP.String(),
); err != nil {
return fmt.Errorf("add SNAT rule: %w", err)
}
// Inbound: host -> guest. Packets arrive with dst=hostIP, DNAT to guest IP.
if err := iptables(slot.NamespaceID,
"-t", "nat", "-A", "PREROUTING",
"-i", "eth0", "-d", slot.HostIP.String(),
"-j", "DNAT", "--to", guestIP,
); err != nil {
return fmt.Errorf("add DNAT rule: %w", err)
}
// Switch back to host namespace for host-side config.
if err := netns.Set(hostNS); err != nil {
return fmt.Errorf("switch to host ns: %w", err)
}
// Configure veth on host side.
hostVeth, err := netlink.LinkByName(slot.VethName)
if err != nil {
return fmt.Errorf("find veth in host: %w", err)
}
vethAddr := &netlink.Addr{
IPNet: &net.IPNet{
IP: slot.VethIP,
Mask: net.CIDRMask(31, 32),
},
}
if err := netlink.AddrAdd(hostVeth, vethAddr); err != nil {
return fmt.Errorf("set veth addr: %w", err)
}
if err := netlink.LinkSetUp(hostVeth); err != nil {
return fmt.Errorf("bring up veth: %w", err)
}
// Route to sandbox's host IP via vpeer.
_, hostNet, _ := net.ParseCIDR(fmt.Sprintf("%s/32", slot.HostIP.String()))
if err := netlink.RouteAdd(&netlink.Route{
Dst: hostNet,
Gw: slot.VpeerIP,
}); err != nil {
return fmt.Errorf("add host route: %w", err)
}
// Find default gateway interface for FORWARD rules.
defaultIface, err := getDefaultInterface()
if err != nil {
return fmt.Errorf("get default interface: %w", err)
}
// FORWARD rules: allow traffic between veth and default interface.
if err := iptablesHost(
"-A", "FORWARD",
"-i", slot.VethName, "-o", defaultIface,
"-j", "ACCEPT",
); err != nil {
return fmt.Errorf("add forward rule (out): %w", err)
}
if err := iptablesHost(
"-A", "FORWARD",
"-i", defaultIface, "-o", slot.VethName,
"-j", "ACCEPT",
); err != nil {
return fmt.Errorf("add forward rule (in): %w", err)
}
// MASQUERADE for outbound traffic from sandbox.
if err := iptablesHost(
"-t", "nat", "-A", "POSTROUTING",
"-s", fmt.Sprintf("%s/32", slot.HostIP.String()),
"-o", defaultIface,
"-j", "MASQUERADE",
); err != nil {
return fmt.Errorf("add masquerade rule: %w", err)
}
slog.Info("network created",
"ns", slot.NamespaceID,
"host_ip", slot.HostIP.String(),
"guest_ip", guestIP,
)
return nil
}
// RemoveNetwork tears down the network topology for a sandbox.
func RemoveNetwork(slot *Slot) error {
defaultIface, _ := getDefaultInterface()
// Remove host-side iptables rules (best effort).
if defaultIface != "" {
iptablesHost(
"-D", "FORWARD",
"-i", slot.VethName, "-o", defaultIface,
"-j", "ACCEPT",
)
iptablesHost(
"-D", "FORWARD",
"-i", defaultIface, "-o", slot.VethName,
"-j", "ACCEPT",
)
iptablesHost(
"-t", "nat", "-D", "POSTROUTING",
"-s", fmt.Sprintf("%s/32", slot.HostIP.String()),
"-o", defaultIface,
"-j", "MASQUERADE",
)
}
// Remove host route.
_, hostNet, _ := net.ParseCIDR(fmt.Sprintf("%s/32", slot.HostIP.String()))
netlink.RouteDel(&netlink.Route{
Dst: hostNet,
Gw: slot.VpeerIP,
})
// Delete veth (also destroys the peer in the namespace).
if veth, err := netlink.LinkByName(slot.VethName); err == nil {
netlink.LinkDel(veth)
}
// Delete the named namespace.
netns.DeleteNamed(slot.NamespaceID)
slog.Info("network removed", "ns", slot.NamespaceID)
return nil
}
// nsExec runs a command inside a network namespace.
func nsExec(nsName string, command string, args ...string) error {
cmdArgs := append([]string{"netns", "exec", nsName, command}, args...)
cmd := exec.Command("ip", cmdArgs...)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("%s %v: %s: %w", command, args, string(out), err)
}
return nil
}
// iptables runs an iptables command inside a network namespace.
func iptables(nsName string, args ...string) error {
cmdArgs := append([]string{"netns", "exec", nsName, "iptables"}, args...)
cmd := exec.Command("ip", cmdArgs...)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("iptables %v: %s: %w", args, string(out), err)
}
return nil
}
// iptablesHost runs an iptables command in the host namespace.
func iptablesHost(args ...string) error {
cmd := exec.Command("iptables", args...)
out, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("iptables %v: %s: %w", args, string(out), err)
}
return nil
}
// getDefaultInterface returns the name of the host's default gateway interface.
func getDefaultInterface() (string, error) {
routes, err := netlink.RouteList(nil, netlink.FAMILY_V4)
if err != nil {
return "", fmt.Errorf("list routes: %w", err)
}
for _, r := range routes {
if r.Dst == nil || r.Dst.String() == "0.0.0.0/0" {
link, err := netlink.LinkByIndex(r.LinkIndex)
if err != nil {
return "", fmt.Errorf("get link by index %d: %w", r.LinkIndex, err)
}
return link.Attrs().Name, nil
}
}
return "", fmt.Errorf("no default route found")
}

View File

@ -0,0 +1,122 @@
package vm
import "fmt"
// VMConfig holds the configuration for creating a Firecracker microVM.
type VMConfig struct {
// SandboxID is the unique identifier for this sandbox (e.g., "sb-a1b2c3d4").
SandboxID string
// KernelPath is the path to the uncompressed Linux kernel (vmlinux).
KernelPath string
// RootfsPath is the path to the ext4 rootfs image for this sandbox.
// This should be a per-sandbox copy (reflink clone of the base image).
RootfsPath string
// VCPUs is the number of virtual CPUs to allocate (default: 1).
VCPUs int
// MemoryMB is the amount of RAM in megabytes (default: 512).
MemoryMB int
// NetworkNamespace is the name of the network namespace to launch
// Firecracker inside (e.g., "ns-1"). The namespace must already exist
// with a TAP device configured.
NetworkNamespace string
// TapDevice is the name of the TAP device inside the network namespace
// that Firecracker will attach to (e.g., "tap0").
TapDevice string
// TapMAC is the MAC address for the TAP device.
TapMAC string
// GuestIP is the IP address assigned to the guest VM (e.g., "169.254.0.21").
GuestIP string
// GatewayIP is the gateway IP (the TAP device's IP, e.g., "169.254.0.22").
GatewayIP string
// NetMask is the subnet mask for the guest network (e.g., "255.255.255.252").
NetMask string
// FirecrackerBin is the path to the firecracker binary.
FirecrackerBin string
// SocketPath is the path for the Firecracker API Unix socket.
SocketPath string
// SandboxDir is the tmpfs mount point for per-sandbox files inside the
// mount namespace (e.g., "/fc-vm").
SandboxDir string
// InitPath is the path to the init process inside the guest.
// Defaults to "/sbin/init" if empty.
InitPath string
}
func (c *VMConfig) applyDefaults() {
if c.VCPUs == 0 {
c.VCPUs = 1
}
if c.MemoryMB == 0 {
c.MemoryMB = 512
}
if c.FirecrackerBin == "" {
c.FirecrackerBin = "/usr/local/bin/firecracker"
}
if c.SocketPath == "" {
c.SocketPath = fmt.Sprintf("/tmp/fc-%s.sock", c.SandboxID)
}
if c.SandboxDir == "" {
c.SandboxDir = fmt.Sprintf("/tmp/fc-sandbox-%s", c.SandboxID)
}
if c.TapDevice == "" {
c.TapDevice = "tap0"
}
if c.TapMAC == "" {
c.TapMAC = "02:FC:00:00:00:05"
}
if c.InitPath == "" {
c.InitPath = "/usr/local/bin/wrenn-init"
}
}
// kernelArgs builds the kernel command line for the VM.
func (c *VMConfig) kernelArgs() string {
// ip= format: <client-ip>::<gw-ip>:<netmask>:<hostname>:<iface>:<autoconf>
ipArg := fmt.Sprintf("ip=%s::%s:%s:sandbox:eth0:off",
c.GuestIP, c.GatewayIP, c.NetMask,
)
return fmt.Sprintf(
"console=ttyS0 reboot=k panic=1 pci=off quiet loglevel=1 init=%s %s",
c.InitPath, ipArg,
)
}
func (c *VMConfig) validate() error {
if c.SandboxID == "" {
return fmt.Errorf("SandboxID is required")
}
if c.KernelPath == "" {
return fmt.Errorf("KernelPath is required")
}
if c.RootfsPath == "" {
return fmt.Errorf("RootfsPath is required")
}
if c.NetworkNamespace == "" {
return fmt.Errorf("NetworkNamespace is required")
}
if c.GuestIP == "" {
return fmt.Errorf("GuestIP is required")
}
if c.GatewayIP == "" {
return fmt.Errorf("GatewayIP is required")
}
if c.NetMask == "" {
return fmt.Errorf("NetMask is required")
}
return nil
}

141
internal/vm/fc.go Normal file
View File

@ -0,0 +1,141 @@
package vm
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"time"
)
// fcClient talks to the Firecracker HTTP API over a Unix socket.
type fcClient struct {
http *http.Client
socketPath string
}
func newFCClient(socketPath string) *fcClient {
return &fcClient{
socketPath: socketPath,
http: &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
var d net.Dialer
return d.DialContext(ctx, "unix", socketPath)
},
},
Timeout: 10 * time.Second,
},
}
}
func (c *fcClient) do(ctx context.Context, method, path string, body any) error {
var bodyReader io.Reader
if body != nil {
data, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("marshal request body: %w", err)
}
bodyReader = bytes.NewReader(data)
}
// The host in the URL is ignored for Unix sockets; we use "localhost" by convention.
req, err := http.NewRequestWithContext(ctx, method, "http://localhost"+path, bodyReader)
if err != nil {
return fmt.Errorf("create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.http.Do(req)
if err != nil {
return fmt.Errorf("%s %s: %w", method, path, err)
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("%s %s: status %d: %s", method, path, resp.StatusCode, string(respBody))
}
return nil
}
// setBootSource configures the kernel and boot args.
func (c *fcClient) setBootSource(ctx context.Context, kernelPath, bootArgs string) error {
return c.do(ctx, http.MethodPut, "/boot-source", map[string]string{
"kernel_image_path": kernelPath,
"boot_args": bootArgs,
})
}
// setRootfsDrive configures the root filesystem drive.
func (c *fcClient) setRootfsDrive(ctx context.Context, driveID, path string, readOnly bool) error {
return c.do(ctx, http.MethodPut, "/drives/"+driveID, map[string]any{
"drive_id": driveID,
"path_on_host": path,
"is_root_device": true,
"is_read_only": readOnly,
})
}
// setNetworkInterface configures a network interface attached to a TAP device.
func (c *fcClient) setNetworkInterface(ctx context.Context, ifaceID, tapName, macAddr string) error {
return c.do(ctx, http.MethodPut, "/network-interfaces/"+ifaceID, map[string]any{
"iface_id": ifaceID,
"host_dev_name": tapName,
"guest_mac": macAddr,
})
}
// setMachineConfig configures vCPUs, memory, and other machine settings.
func (c *fcClient) setMachineConfig(ctx context.Context, vcpus, memMB int) error {
return c.do(ctx, http.MethodPut, "/machine-config", map[string]any{
"vcpu_count": vcpus,
"mem_size_mib": memMB,
"smt": false,
})
}
// startVM issues the InstanceStart action.
func (c *fcClient) startVM(ctx context.Context) error {
return c.do(ctx, http.MethodPut, "/actions", map[string]string{
"action_type": "InstanceStart",
})
}
// pauseVM pauses the microVM.
func (c *fcClient) pauseVM(ctx context.Context) error {
return c.do(ctx, http.MethodPatch, "/vm", map[string]string{
"state": "Paused",
})
}
// resumeVM resumes a paused microVM.
func (c *fcClient) resumeVM(ctx context.Context) error {
return c.do(ctx, http.MethodPatch, "/vm", map[string]string{
"state": "Resumed",
})
}
// createSnapshot creates a full VM snapshot.
func (c *fcClient) createSnapshot(ctx context.Context, snapPath, memPath string) error {
return c.do(ctx, http.MethodPut, "/snapshot/create", map[string]any{
"snapshot_type": "Full",
"snapshot_path": snapPath,
"mem_file_path": memPath,
})
}
// loadSnapshot loads a VM snapshot.
func (c *fcClient) loadSnapshot(ctx context.Context, snapPath, memPath string) error {
return c.do(ctx, http.MethodPut, "/snapshot/load", map[string]any{
"snapshot_path": snapPath,
"mem_file_path": memPath,
"resume_vm": false,
})
}

View File

@ -0,0 +1,125 @@
package vm
import (
"context"
"fmt"
"log/slog"
"os"
"os/exec"
"syscall"
"time"
)
// process represents a running Firecracker process with mount and network
// namespace isolation.
type process struct {
cmd *exec.Cmd
cancel context.CancelFunc
exitCh chan struct{}
exitErr error
}
// startProcess launches the Firecracker binary inside an isolated mount namespace
// and the specified network namespace. The launch sequence:
//
// 1. unshare -m: creates a private mount namespace
// 2. mount --make-rprivate /: prevents mount propagation to host
// 3. mount tmpfs at SandboxDir: ephemeral workspace for this VM
// 4. symlink kernel and rootfs into SandboxDir
// 5. ip netns exec <ns>: enters the network namespace where TAP is configured
// 6. exec firecracker with the API socket path
func startProcess(ctx context.Context, cfg *VMConfig) (*process, error) {
execCtx, cancel := context.WithCancel(ctx)
script := buildStartScript(cfg)
cmd := exec.CommandContext(execCtx, "unshare", "-m", "--", "bash", "-c", script)
cmd.SysProcAttr = &syscall.SysProcAttr{
Setsid: true, // new session so signals don't propagate from parent
}
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
cancel()
return nil, fmt.Errorf("start firecracker process: %w", err)
}
p := &process{
cmd: cmd,
cancel: cancel,
exitCh: make(chan struct{}),
}
go func() {
p.exitErr = cmd.Wait()
close(p.exitCh)
}()
slog.Info("firecracker process started",
"pid", cmd.Process.Pid,
"sandbox", cfg.SandboxID,
)
return p, nil
}
// buildStartScript generates the bash script that sets up the mount namespace,
// symlinks kernel/rootfs, and execs Firecracker inside the network namespace.
func buildStartScript(cfg *VMConfig) string {
return fmt.Sprintf(`
set -euo pipefail
# Prevent mount propagation to the host
mount --make-rprivate /
# Create ephemeral tmpfs workspace
mkdir -p %[1]s
mount -t tmpfs tmpfs %[1]s
# Symlink kernel and rootfs into the workspace
ln -s %[2]s %[1]s/vmlinux
ln -s %[3]s %[1]s/rootfs.ext4
# Launch Firecracker inside the network namespace
exec ip netns exec %[4]s %[5]s --api-sock %[6]s
`,
cfg.SandboxDir, // 1
cfg.KernelPath, // 2
cfg.RootfsPath, // 3
cfg.NetworkNamespace, // 4
cfg.FirecrackerBin, // 5
cfg.SocketPath, // 6
)
}
// stop sends SIGTERM and waits for the process to exit. If it doesn't exit
// within 10 seconds, SIGKILL is sent.
func (p *process) stop() error {
if p.cmd.Process == nil {
return nil
}
// Send SIGTERM to the process group (negative PID).
if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGTERM); err != nil {
slog.Debug("sigterm failed, process may have exited", "error", err)
}
select {
case <-p.exitCh:
return nil
case <-time.After(10 * time.Second):
slog.Warn("firecracker did not exit after SIGTERM, sending SIGKILL")
if err := syscall.Kill(-p.cmd.Process.Pid, syscall.SIGKILL); err != nil {
slog.Debug("sigkill failed", "error", err)
}
<-p.exitCh
return nil
}
}
// exited returns a channel that is closed when the process exits.
func (p *process) exited() <-chan struct{} {
return p.exitCh
}

View File

@ -0,0 +1,192 @@
package vm
import (
"context"
"fmt"
"log/slog"
"os"
"time"
)
// VM represents a running Firecracker microVM.
type VM struct {
Config VMConfig
process *process
client *fcClient
}
// Manager handles the lifecycle of Firecracker microVMs.
type Manager struct {
// vms tracks running VMs by sandbox ID.
vms map[string]*VM
}
// NewManager creates a new VM manager.
func NewManager() *Manager {
return &Manager{
vms: make(map[string]*VM),
}
}
// Create boots a new Firecracker microVM with the given configuration.
// The network namespace and TAP device must already be set up.
func (m *Manager) Create(ctx context.Context, cfg VMConfig) (*VM, error) {
cfg.applyDefaults()
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("invalid config: %w", err)
}
// Clean up any leftover socket from a previous run.
os.Remove(cfg.SocketPath)
slog.Info("creating VM",
"sandbox", cfg.SandboxID,
"vcpus", cfg.VCPUs,
"memory_mb", cfg.MemoryMB,
)
// Step 1: Launch the Firecracker process.
proc, err := startProcess(ctx, &cfg)
if err != nil {
return nil, fmt.Errorf("start process: %w", err)
}
// Step 2: Wait for the API socket to appear.
if err := waitForSocket(ctx, cfg.SocketPath, proc); err != nil {
proc.stop()
return nil, fmt.Errorf("wait for socket: %w", err)
}
// Step 3: Configure the VM via the Firecracker API.
client := newFCClient(cfg.SocketPath)
if err := configureVM(ctx, client, &cfg); err != nil {
proc.stop()
return nil, fmt.Errorf("configure VM: %w", err)
}
// Step 4: Start the VM.
if err := client.startVM(ctx); err != nil {
proc.stop()
return nil, fmt.Errorf("start VM: %w", err)
}
vm := &VM{
Config: cfg,
process: proc,
client: client,
}
m.vms[cfg.SandboxID] = vm
slog.Info("VM started successfully", "sandbox", cfg.SandboxID)
return vm, nil
}
// configureVM sends the configuration to Firecracker via its HTTP API.
func configureVM(ctx context.Context, client *fcClient, cfg *VMConfig) error {
// Boot source (kernel + args)
if err := client.setBootSource(ctx, cfg.KernelPath, cfg.kernelArgs()); err != nil {
return fmt.Errorf("set boot source: %w", err)
}
// Root drive
if err := client.setRootfsDrive(ctx, "rootfs", cfg.RootfsPath, false); err != nil {
return fmt.Errorf("set rootfs drive: %w", err)
}
// Network interface
if err := client.setNetworkInterface(ctx, "eth0", cfg.TapDevice, cfg.TapMAC); err != nil {
return fmt.Errorf("set network interface: %w", err)
}
// Machine config (vCPUs + memory)
if err := client.setMachineConfig(ctx, cfg.VCPUs, cfg.MemoryMB); err != nil {
return fmt.Errorf("set machine config: %w", err)
}
return nil
}
// Pause pauses a running VM.
func (m *Manager) Pause(ctx context.Context, sandboxID string) error {
vm, ok := m.vms[sandboxID]
if !ok {
return fmt.Errorf("VM not found: %s", sandboxID)
}
if err := vm.client.pauseVM(ctx); err != nil {
return fmt.Errorf("pause VM: %w", err)
}
slog.Info("VM paused", "sandbox", sandboxID)
return nil
}
// Resume resumes a paused VM.
func (m *Manager) Resume(ctx context.Context, sandboxID string) error {
vm, ok := m.vms[sandboxID]
if !ok {
return fmt.Errorf("VM not found: %s", sandboxID)
}
if err := vm.client.resumeVM(ctx); err != nil {
return fmt.Errorf("resume VM: %w", err)
}
slog.Info("VM resumed", "sandbox", sandboxID)
return nil
}
// Destroy stops and cleans up a VM.
func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
vm, ok := m.vms[sandboxID]
if !ok {
return fmt.Errorf("VM not found: %s", sandboxID)
}
slog.Info("destroying VM", "sandbox", sandboxID)
// Stop the Firecracker process.
if err := vm.process.stop(); err != nil {
slog.Warn("error stopping process", "sandbox", sandboxID, "error", err)
}
// Clean up the API socket.
os.Remove(vm.Config.SocketPath)
delete(m.vms, sandboxID)
slog.Info("VM destroyed", "sandbox", sandboxID)
return nil
}
// Get returns a running VM by sandbox ID.
func (m *Manager) Get(sandboxID string) (*VM, bool) {
vm, ok := m.vms[sandboxID]
return vm, ok
}
// waitForSocket polls for the Firecracker API socket to appear on disk.
func waitForSocket(ctx context.Context, socketPath string, proc *process) error {
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
timeout := time.After(5 * time.Second)
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-proc.exited():
return fmt.Errorf("firecracker process exited before socket was ready")
case <-timeout:
return fmt.Errorf("timed out waiting for API socket at %s", socketPath)
case <-ticker.C:
if _, err := os.Stat(socketPath); err == nil {
return nil
}
}
}
}