Add host agent RPC server with sandbox lifecycle management

Implement the host agent as a Connect RPC server that orchestrates
sandbox creation, destruction, pause/resume, and command execution.
Includes sandbox manager with TTL-based reaper, network slot allocator,
rootfs cloning, hostagent proto definition with generated stubs, and
test/debug scripts. Fix Firecracker process lifetime bug where VM was
tied to HTTP request context instead of background context.
This commit is contained in:
2026-03-10 03:54:53 +06:00
parent c31ce90306
commit 6f0c365d44
24 changed files with 6236 additions and 121 deletions

233
scripts/test-host.sh Executable file
View File

@ -0,0 +1,233 @@
#!/usr/bin/env bash
#
# test-host.sh — Integration test for the Wrenn host agent.
#
# Prerequisites:
# - Host agent running: sudo ./builds/wrenn-agent
# - Firecracker installed at /usr/local/bin/firecracker
# - Kernel at /var/lib/wrenn/kernels/vmlinux
# - Base rootfs at /var/lib/wrenn/images/minimal.ext4 (with envd + wrenn-init baked in)
#
# Usage:
# ./scripts/test-host.sh [agent_url]
#
# The agent URL defaults to http://localhost:50051.
set -euo pipefail
AGENT="${1:-http://localhost:50051}"
BASE="/hostagent.v1.HostAgentService"
SANDBOX_ID=""
# Colors for output.
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m'
pass() { echo -e "${GREEN}PASS${NC}: $1"; }
fail() { echo -e "${RED}FAIL${NC}: $1"; exit 1; }
info() { echo -e "${YELLOW}----${NC}: $1"; }
rpc() {
local method="$1"
local body="$2"
curl -s -X POST \
-H "Content-Type: application/json" \
"${AGENT}${BASE}/${method}" \
-d "${body}"
}
# ──────────────────────────────────────────────────
# Test 1: List sandboxes (should be empty)
# ──────────────────────────────────────────────────
info "Test 1: List sandboxes (expect empty)"
RESULT=$(rpc "ListSandboxes" '{}')
echo " Response: ${RESULT}"
echo "${RESULT}" | grep -q '"sandboxes"' || echo "${RESULT}" | grep -q '{}' && \
pass "ListSandboxes returned" || \
fail "ListSandboxes failed"
# ──────────────────────────────────────────────────
# Test 2: Create a sandbox
# ──────────────────────────────────────────────────
info "Test 2: Create a sandbox"
RESULT=$(rpc "CreateSandbox" '{
"template": "minimal",
"vcpus": 1,
"memoryMb": 512,
"timeoutSec": 300
}')
echo " Response: ${RESULT}"
SANDBOX_ID=$(echo "${RESULT}" | python3 -c "import sys,json; print(json.load(sys.stdin)['sandboxId'])" 2>/dev/null) || \
fail "CreateSandbox did not return sandboxId"
echo " Sandbox ID: ${SANDBOX_ID}"
pass "Sandbox created: ${SANDBOX_ID}"
# ──────────────────────────────────────────────────
# Test 3: List sandboxes (should have one)
# ──────────────────────────────────────────────────
info "Test 3: List sandboxes (expect one)"
RESULT=$(rpc "ListSandboxes" '{}')
echo " Response: ${RESULT}"
echo "${RESULT}" | grep -q "${SANDBOX_ID}" && \
pass "Sandbox ${SANDBOX_ID} found in list" || \
fail "Sandbox not found in list"
# ──────────────────────────────────────────────────
# Test 4: Execute a command
# ──────────────────────────────────────────────────
info "Test 4: Execute 'echo hello world'"
RESULT=$(rpc "Exec" "{
\"sandboxId\": \"${SANDBOX_ID}\",
\"cmd\": \"/bin/sh\",
\"args\": [\"-c\", \"echo hello world\"],
\"timeoutSec\": 10
}")
echo " Response: ${RESULT}"
# stdout is base64-encoded in Connect RPC JSON.
STDOUT=$(echo "${RESULT}" | python3 -c "
import sys, json, base64
r = json.load(sys.stdin)
print(base64.b64decode(r['stdout']).decode().strip())
" 2>/dev/null) || fail "Exec did not return stdout"
[ "${STDOUT}" = "hello world" ] && \
pass "Exec returned correct output: '${STDOUT}'" || \
fail "Expected 'hello world', got '${STDOUT}'"
# ──────────────────────────────────────────────────
# Test 5: Execute a multi-line command
# ──────────────────────────────────────────────────
info "Test 5: Execute multi-line command"
RESULT=$(rpc "Exec" "{
\"sandboxId\": \"${SANDBOX_ID}\",
\"cmd\": \"/bin/sh\",
\"args\": [\"-c\", \"echo line1; echo line2; echo line3\"],
\"timeoutSec\": 10
}")
echo " Response: ${RESULT}"
LINE_COUNT=$(echo "${RESULT}" | python3 -c "
import sys, json, base64
r = json.load(sys.stdin)
lines = base64.b64decode(r['stdout']).decode().strip().split('\n')
print(len(lines))
" 2>/dev/null)
[ "${LINE_COUNT}" = "3" ] && \
pass "Multi-line output: ${LINE_COUNT} lines" || \
fail "Expected 3 lines, got ${LINE_COUNT}"
# ──────────────────────────────────────────────────
# Test 6: Pause the sandbox
# ──────────────────────────────────────────────────
info "Test 6: Pause sandbox"
RESULT=$(rpc "PauseSandbox" "{\"sandboxId\": \"${SANDBOX_ID}\"}")
echo " Response: ${RESULT}"
# Verify status is paused.
LIST=$(rpc "ListSandboxes" '{}')
echo "${LIST}" | grep -q '"paused"' && \
pass "Sandbox paused" || \
fail "Sandbox not in paused state"
# ──────────────────────────────────────────────────
# Test 7: Exec should fail while paused
# ──────────────────────────────────────────────────
info "Test 7: Exec while paused (expect error)"
RESULT=$(rpc "Exec" "{
\"sandboxId\": \"${SANDBOX_ID}\",
\"cmd\": \"/bin/echo\",
\"args\": [\"should fail\"]
}")
echo " Response: ${RESULT}"
echo "${RESULT}" | grep -qi "not running\|error\|code" && \
pass "Exec correctly rejected while paused" || \
fail "Exec should have failed while paused"
# ──────────────────────────────────────────────────
# Test 8: Resume the sandbox
# ──────────────────────────────────────────────────
info "Test 8: Resume sandbox"
RESULT=$(rpc "ResumeSandbox" "{\"sandboxId\": \"${SANDBOX_ID}\"}")
echo " Response: ${RESULT}"
# Verify status is running.
LIST=$(rpc "ListSandboxes" '{}')
echo "${LIST}" | grep -q '"running"' && \
pass "Sandbox resumed" || \
fail "Sandbox not in running state"
# ──────────────────────────────────────────────────
# Test 9: Exec after resume
# ──────────────────────────────────────────────────
info "Test 9: Exec after resume"
RESULT=$(rpc "Exec" "{
\"sandboxId\": \"${SANDBOX_ID}\",
\"cmd\": \"/bin/sh\",
\"args\": [\"-c\", \"echo resumed ok\"],
\"timeoutSec\": 10
}")
echo " Response: ${RESULT}"
STDOUT=$(echo "${RESULT}" | python3 -c "
import sys, json, base64
r = json.load(sys.stdin)
print(base64.b64decode(r['stdout']).decode().strip())
" 2>/dev/null) || fail "Exec after resume failed"
[ "${STDOUT}" = "resumed ok" ] && \
pass "Exec after resume works: '${STDOUT}'" || \
fail "Expected 'resumed ok', got '${STDOUT}'"
# ──────────────────────────────────────────────────
# Test 10: Destroy the sandbox
# ──────────────────────────────────────────────────
info "Test 10: Destroy sandbox"
RESULT=$(rpc "DestroySandbox" "{\"sandboxId\": \"${SANDBOX_ID}\"}")
echo " Response: ${RESULT}"
pass "Sandbox destroyed"
# ──────────────────────────────────────────────────
# Test 11: List sandboxes (should be empty again)
# ──────────────────────────────────────────────────
info "Test 11: List sandboxes (expect empty)"
RESULT=$(rpc "ListSandboxes" '{}')
echo " Response: ${RESULT}"
echo "${RESULT}" | grep -q "${SANDBOX_ID}" && \
fail "Destroyed sandbox still in list" || \
pass "Sandbox list is clean"
# ──────────────────────────────────────────────────
# Test 12: Destroy non-existent sandbox (expect error)
# ──────────────────────────────────────────────────
info "Test 12: Destroy non-existent sandbox (expect error)"
RESULT=$(rpc "DestroySandbox" '{"sandboxId": "sb-nonexist"}')
echo " Response: ${RESULT}"
echo "${RESULT}" | grep -qi "not found\|error\|code" && \
pass "Correctly rejected non-existent sandbox" || \
fail "Should have returned error for non-existent sandbox"
echo ""
echo -e "${GREEN}All tests passed!${NC}"

73
scripts/update-debug-rootfs.sh Executable file
View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
#
# update-debug-rootfs.sh — Build envd and inject it (plus wrenn-init) into the debug rootfs.
#
# This script:
# 1. Builds a fresh envd static binary via make
# 2. Mounts the rootfs image
# 3. Copies envd and wrenn-init into the image
# 4. Unmounts cleanly
#
# Usage:
# bash scripts/update-debug-rootfs.sh [rootfs_path]
#
# Defaults to /var/lib/wrenn/images/minimal.ext4
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
ROOTFS="${1:-/var/lib/wrenn/images/minimal.ext4}"
MOUNT_DIR="/tmp/wrenn-rootfs-update"
if [ ! -f "${ROOTFS}" ]; then
echo "ERROR: Rootfs not found at ${ROOTFS}"
exit 1
fi
# Step 1: Build envd.
echo "==> Building envd..."
cd "${PROJECT_ROOT}"
make build-envd
ENVD_BIN="${PROJECT_ROOT}/builds/envd"
if [ ! -f "${ENVD_BIN}" ]; then
echo "ERROR: envd binary not found at ${ENVD_BIN}"
exit 1
fi
# Verify it's statically linked.
if ! file "${ENVD_BIN}" | grep -q "statically linked"; then
echo "ERROR: envd is not statically linked!"
exit 1
fi
# Step 2: Mount the rootfs.
echo "==> Mounting rootfs at ${MOUNT_DIR}..."
mkdir -p "${MOUNT_DIR}"
sudo mount -o loop "${ROOTFS}" "${MOUNT_DIR}"
cleanup() {
echo "==> Unmounting rootfs..."
sudo umount "${MOUNT_DIR}" 2>/dev/null || true
rmdir "${MOUNT_DIR}" 2>/dev/null || true
}
trap cleanup EXIT
# Step 3: Copy files into rootfs.
echo "==> Installing envd..."
sudo mkdir -p "${MOUNT_DIR}/usr/local/bin"
sudo cp "${ENVD_BIN}" "${MOUNT_DIR}/usr/local/bin/envd"
sudo chmod 755 "${MOUNT_DIR}/usr/local/bin/envd"
echo "==> Installing wrenn-init..."
sudo cp "${PROJECT_ROOT}/images/wrenn-init.sh" "${MOUNT_DIR}/usr/local/bin/wrenn-init"
sudo chmod 755 "${MOUNT_DIR}/usr/local/bin/wrenn-init"
# Step 4: Verify.
echo ""
echo "==> Installed files:"
ls -la "${MOUNT_DIR}/usr/local/bin/envd" "${MOUNT_DIR}/usr/local/bin/wrenn-init"
echo ""
echo "==> Done. Rootfs updated: ${ROOTFS}"