Add host agent with VM lifecycle, TAP networking, and envd client

Implements Phase 1: boot a Firecracker microVM, execute a command inside
it via envd, and get the output back. Uses raw Firecracker HTTP API via
Unix socket (not the Go SDK) for full control over the VM lifecycle.

- internal/vm: VM manager with create/pause/resume/destroy, Firecracker
  HTTP client, process launcher with unshare + ip netns exec isolation
- internal/network: per-sandbox network namespace with veth pair, TAP
  device, NAT rules, and IP forwarding
- internal/envdclient: Connect RPC client for envd process/filesystem
  services with health check retry
- cmd/host-agent: demo binary that boots a VM, runs "echo hello", prints
  output, and cleans up
- proto/envd: canonical proto files with buf + protoc-gen-connect-go
  code generation
- images/wrenn-init.sh: minimal PID 1 init script for guest VMs
- CLAUDE.md: updated architecture to reflect TAP networking (not vsock)
  and Firecracker HTTP API (not Go SDK)
This commit is contained in:
2026-03-10 00:06:47 +06:00
parent a3898d68fb
commit 7753938044
26 changed files with 5773 additions and 1275 deletions

View File

@ -0,0 +1,138 @@
package envdclient
import (
"context"
"fmt"
"io"
"log/slog"
"net/http"
"connectrpc.com/connect"
envdpb "git.omukk.dev/wrenn/sandbox/proto/envd/gen"
"git.omukk.dev/wrenn/sandbox/proto/envd/gen/genconnect"
)
// Client wraps the Connect RPC client for envd's Process and Filesystem services.
type Client struct {
hostIP string
base string
healthURL string
httpClient *http.Client
process genconnect.ProcessClient
filesystem genconnect.FilesystemClient
}
// New creates a new envd client that connects to the given host IP.
func New(hostIP string) *Client {
base := baseURL(hostIP)
httpClient := newHTTPClient()
return &Client{
hostIP: hostIP,
base: base,
healthURL: base + "/health",
httpClient: httpClient,
process: genconnect.NewProcessClient(httpClient, base),
filesystem: genconnect.NewFilesystemClient(httpClient, base),
}
}
// ExecResult holds the output of a command execution.
type ExecResult struct {
Stdout []byte
Stderr []byte
ExitCode int32
}
// Exec runs a command inside the sandbox and collects all stdout/stderr output.
// It blocks until the command completes.
func (c *Client) Exec(ctx context.Context, cmd string, args ...string) (*ExecResult, error) {
stdin := false
req := connect.NewRequest(&envdpb.StartRequest{
Process: &envdpb.ProcessConfig{
Cmd: cmd,
Args: args,
},
Stdin: &stdin,
})
stream, err := c.process.Start(ctx, req)
if err != nil {
return nil, fmt.Errorf("start process: %w", err)
}
defer stream.Close()
result := &ExecResult{}
for stream.Receive() {
msg := stream.Msg()
if msg.Event == nil {
continue
}
event := msg.Event.GetEvent()
switch e := event.(type) {
case *envdpb.ProcessEvent_Start:
slog.Debug("process started", "pid", e.Start.GetPid())
case *envdpb.ProcessEvent_Data:
output := e.Data.GetOutput()
switch o := output.(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
result.Stdout = append(result.Stdout, o.Stdout...)
case *envdpb.ProcessEvent_DataEvent_Stderr:
result.Stderr = append(result.Stderr, o.Stderr...)
}
case *envdpb.ProcessEvent_End:
result.ExitCode = e.End.GetExitCode()
if e.End.Error != nil {
slog.Debug("process ended with error",
"exit_code", e.End.GetExitCode(),
"error", e.End.GetError(),
)
}
case *envdpb.ProcessEvent_Keepalive:
// Ignore keepalives.
}
}
if err := stream.Err(); err != nil && err != io.EOF {
return result, fmt.Errorf("stream error: %w", err)
}
return result, nil
}
// WriteFile writes content to a file inside the sandbox via envd's filesystem service.
func (c *Client) WriteFile(ctx context.Context, path string, content []byte) error {
// envd uses HTTP upload for files, not Connect RPC.
// POST /files with multipart form data.
// For now, use the filesystem MakeDir for directories.
// TODO: Implement file upload via envd's REST endpoint.
return fmt.Errorf("WriteFile not yet implemented")
}
// ReadFile reads a file from inside the sandbox.
func (c *Client) ReadFile(ctx context.Context, path string) ([]byte, error) {
// TODO: Implement file download via envd's REST endpoint.
return nil, fmt.Errorf("ReadFile not yet implemented")
}
// ListDir lists directory contents inside the sandbox.
func (c *Client) ListDir(ctx context.Context, path string, depth uint32) (*envdpb.ListDirResponse, error) {
req := connect.NewRequest(&envdpb.ListDirRequest{
Path: path,
Depth: depth,
})
resp, err := c.filesystem.ListDir(ctx, req)
if err != nil {
return nil, fmt.Errorf("list dir: %w", err)
}
return resp.Msg, nil
}

View File

@ -0,0 +1,21 @@
package envdclient
import (
"fmt"
"net/http"
)
// envdPort is the default port envd listens on inside the guest.
const envdPort = 49983
// baseURL returns the HTTP base URL for reaching envd at the given host IP.
func baseURL(hostIP string) string {
return fmt.Sprintf("http://%s:%d", hostIP, envdPort)
}
// newHTTPClient returns an http.Client suitable for talking to envd.
// No special transport is needed — envd is reachable via the host IP
// through the veth/TAP network path.
func newHTTPClient() *http.Client {
return &http.Client{}
}

View File

@ -0,0 +1,52 @@
package envdclient
import (
"context"
"fmt"
"log/slog"
"net/http"
"time"
)
// WaitUntilReady polls envd's health endpoint until it responds successfully
// or the context is cancelled. It retries every retryInterval.
func (c *Client) WaitUntilReady(ctx context.Context) error {
const retryInterval = 100 * time.Millisecond
slog.Info("waiting for envd to be ready", "url", c.healthURL)
ticker := time.NewTicker(retryInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("envd not ready: %w", ctx.Err())
case <-ticker.C:
if err := c.healthCheck(ctx); err == nil {
slog.Info("envd is ready", "host", c.hostIP)
return nil
}
}
}
}
// healthCheck sends a single GET /health request to envd.
func (c *Client) healthCheck(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil)
if err != nil {
return err
}
resp, err := c.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent {
return fmt.Errorf("health check returned %d", resp.StatusCode)
}
return nil
}