Add host agent with VM lifecycle, TAP networking, and envd client

Implements Phase 1: boot a Firecracker microVM, execute a command inside
it via envd, and get the output back. Uses raw Firecracker HTTP API via
Unix socket (not the Go SDK) for full control over the VM lifecycle.

- internal/vm: VM manager with create/pause/resume/destroy, Firecracker
  HTTP client, process launcher with unshare + ip netns exec isolation
- internal/network: per-sandbox network namespace with veth pair, TAP
  device, NAT rules, and IP forwarding
- internal/envdclient: Connect RPC client for envd process/filesystem
  services with health check retry
- cmd/host-agent: demo binary that boots a VM, runs "echo hello", prints
  output, and cleans up
- proto/envd: canonical proto files with buf + protoc-gen-connect-go
  code generation
- images/wrenn-init.sh: minimal PID 1 init script for guest VMs
- CLAUDE.md: updated architecture to reflect TAP networking (not vsock)
  and Firecracker HTTP API (not Go SDK)
This commit is contained in:
2026-03-10 00:06:47 +06:00
parent a3898d68fb
commit 7753938044
26 changed files with 5773 additions and 1275 deletions

141
internal/vm/fc.go Normal file
View File

@ -0,0 +1,141 @@
package vm
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"time"
)
// fcClient talks to the Firecracker HTTP API over a Unix socket.
type fcClient struct {
http *http.Client
socketPath string
}
func newFCClient(socketPath string) *fcClient {
return &fcClient{
socketPath: socketPath,
http: &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
var d net.Dialer
return d.DialContext(ctx, "unix", socketPath)
},
},
Timeout: 10 * time.Second,
},
}
}
func (c *fcClient) do(ctx context.Context, method, path string, body any) error {
var bodyReader io.Reader
if body != nil {
data, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("marshal request body: %w", err)
}
bodyReader = bytes.NewReader(data)
}
// The host in the URL is ignored for Unix sockets; we use "localhost" by convention.
req, err := http.NewRequestWithContext(ctx, method, "http://localhost"+path, bodyReader)
if err != nil {
return fmt.Errorf("create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.http.Do(req)
if err != nil {
return fmt.Errorf("%s %s: %w", method, path, err)
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("%s %s: status %d: %s", method, path, resp.StatusCode, string(respBody))
}
return nil
}
// setBootSource configures the kernel and boot args.
func (c *fcClient) setBootSource(ctx context.Context, kernelPath, bootArgs string) error {
return c.do(ctx, http.MethodPut, "/boot-source", map[string]string{
"kernel_image_path": kernelPath,
"boot_args": bootArgs,
})
}
// setRootfsDrive configures the root filesystem drive.
func (c *fcClient) setRootfsDrive(ctx context.Context, driveID, path string, readOnly bool) error {
return c.do(ctx, http.MethodPut, "/drives/"+driveID, map[string]any{
"drive_id": driveID,
"path_on_host": path,
"is_root_device": true,
"is_read_only": readOnly,
})
}
// setNetworkInterface configures a network interface attached to a TAP device.
func (c *fcClient) setNetworkInterface(ctx context.Context, ifaceID, tapName, macAddr string) error {
return c.do(ctx, http.MethodPut, "/network-interfaces/"+ifaceID, map[string]any{
"iface_id": ifaceID,
"host_dev_name": tapName,
"guest_mac": macAddr,
})
}
// setMachineConfig configures vCPUs, memory, and other machine settings.
func (c *fcClient) setMachineConfig(ctx context.Context, vcpus, memMB int) error {
return c.do(ctx, http.MethodPut, "/machine-config", map[string]any{
"vcpu_count": vcpus,
"mem_size_mib": memMB,
"smt": false,
})
}
// startVM issues the InstanceStart action.
func (c *fcClient) startVM(ctx context.Context) error {
return c.do(ctx, http.MethodPut, "/actions", map[string]string{
"action_type": "InstanceStart",
})
}
// pauseVM pauses the microVM.
func (c *fcClient) pauseVM(ctx context.Context) error {
return c.do(ctx, http.MethodPatch, "/vm", map[string]string{
"state": "Paused",
})
}
// resumeVM resumes a paused microVM.
func (c *fcClient) resumeVM(ctx context.Context) error {
return c.do(ctx, http.MethodPatch, "/vm", map[string]string{
"state": "Resumed",
})
}
// createSnapshot creates a full VM snapshot.
func (c *fcClient) createSnapshot(ctx context.Context, snapPath, memPath string) error {
return c.do(ctx, http.MethodPut, "/snapshot/create", map[string]any{
"snapshot_type": "Full",
"snapshot_path": snapPath,
"mem_file_path": memPath,
})
}
// loadSnapshot loads a VM snapshot.
func (c *fcClient) loadSnapshot(ctx context.Context, snapPath, memPath string) error {
return c.do(ctx, http.MethodPut, "/snapshot/load", map[string]any{
"snapshot_path": snapPath,
"mem_file_path": memPath,
"resume_vm": false,
})
}