forked from wrenn/wrenn
Snapshot race fix:
- Pre-mark sandbox as "paused" in DB before issuing CreateSnapshot and
PauseSandbox RPCs, preventing the reconciler from marking it "stopped"
during the flatten window when the sandbox is gone from the host
agent's in-memory map but DB still says "running"
- Revert status to "running" on RPC failure
- Check ctx.Err() before writing response to avoid writing to dead
connections when client disconnects during long snapshot operations
Delete auth fix:
- Block non-admin deletion of platform templates (team_id = all-zeros)
at DELETE /v1/snapshots/{name} with 403, preventing file deletion
before the team ownership check fails
Sparse dd:
- Add conv=sparse to dd in FlattenSnapshot so flattened images preserve
sparseness (~200MB actual vs 5GB logical)
Default disk size:
- Change default disk_size_mb from 20GB to 5GB across migration,
manager, service, build, and EnsureImageSizes
- Disable split-button dropdown arrow for platform templates in
dashboard snapshots page (teams cannot delete platform templates)
308 lines
8.6 KiB
Protocol Buffer
308 lines
8.6 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package hostagent.v1;
|
|
|
|
// HostAgentService manages sandbox VMs on a single physical host.
|
|
// The control plane calls these RPCs to orchestrate sandbox lifecycle.
|
|
service HostAgentService {
|
|
// CreateSandbox boots a new microVM with the given configuration.
|
|
rpc CreateSandbox(CreateSandboxRequest) returns (CreateSandboxResponse);
|
|
|
|
// DestroySandbox stops and cleans up a sandbox (VM, network, rootfs).
|
|
rpc DestroySandbox(DestroySandboxRequest) returns (DestroySandboxResponse);
|
|
|
|
// PauseSandbox pauses a running sandbox's VM.
|
|
rpc PauseSandbox(PauseSandboxRequest) returns (PauseSandboxResponse);
|
|
|
|
// ResumeSandbox resumes a paused sandbox's VM.
|
|
rpc ResumeSandbox(ResumeSandboxRequest) returns (ResumeSandboxResponse);
|
|
|
|
// Exec runs a command inside a sandbox and returns the collected output.
|
|
rpc Exec(ExecRequest) returns (ExecResponse);
|
|
|
|
// ListSandboxes returns all sandboxes managed by this host agent.
|
|
rpc ListSandboxes(ListSandboxesRequest) returns (ListSandboxesResponse);
|
|
|
|
// WriteFile writes content to a file inside a sandbox.
|
|
rpc WriteFile(WriteFileRequest) returns (WriteFileResponse);
|
|
|
|
// ReadFile reads a file from inside a sandbox.
|
|
rpc ReadFile(ReadFileRequest) returns (ReadFileResponse);
|
|
|
|
// CreateSnapshot pauses a sandbox, takes a snapshot, stores it as a reusable
|
|
// template, and destroys the sandbox.
|
|
rpc CreateSnapshot(CreateSnapshotRequest) returns (CreateSnapshotResponse);
|
|
|
|
// DeleteSnapshot removes a snapshot template from disk.
|
|
rpc DeleteSnapshot(DeleteSnapshotRequest) returns (DeleteSnapshotResponse);
|
|
|
|
// ExecStream runs a command inside a sandbox and streams output events as they arrive.
|
|
rpc ExecStream(ExecStreamRequest) returns (stream ExecStreamResponse);
|
|
|
|
// WriteFileStream writes a file to a sandbox using chunked streaming.
|
|
// First message must contain metadata (sandbox_id, path). Subsequent messages contain data chunks.
|
|
rpc WriteFileStream(stream WriteFileStreamRequest) returns (WriteFileStreamResponse);
|
|
|
|
// ReadFileStream reads a file from a sandbox and streams it back in chunks.
|
|
rpc ReadFileStream(ReadFileStreamRequest) returns (stream ReadFileStreamResponse);
|
|
|
|
// PingSandbox resets the inactivity timer for a running sandbox.
|
|
rpc PingSandbox(PingSandboxRequest) returns (PingSandboxResponse);
|
|
|
|
// Terminate instructs the host agent to destroy all sandboxes and exit.
|
|
// Called by the control plane immediately when a host is deleted so the
|
|
// agent shuts down without waiting for the next heartbeat cycle.
|
|
rpc Terminate(TerminateRequest) returns (TerminateResponse);
|
|
|
|
// GetSandboxMetrics returns ring buffer metrics for a running sandbox.
|
|
rpc GetSandboxMetrics(GetSandboxMetricsRequest) returns (GetSandboxMetricsResponse);
|
|
|
|
// FlushSandboxMetrics returns all ring buffer tiers and clears them.
|
|
// Called by the control plane before pause/destroy to persist metrics to DB.
|
|
rpc FlushSandboxMetrics(FlushSandboxMetricsRequest) returns (FlushSandboxMetricsResponse);
|
|
|
|
// FlattenRootfs stops the sandbox VM, flattens the device-mapper CoW
|
|
// snapshot into a standalone rootfs.ext4 in the images directory, then
|
|
// cleans up all sandbox resources. Used by the template build system to
|
|
// produce image-only templates (no memory/CPU state).
|
|
rpc FlattenRootfs(FlattenRootfsRequest) returns (FlattenRootfsResponse);
|
|
|
|
}
|
|
|
|
message CreateSandboxRequest {
|
|
// Sandbox ID assigned by the control plane. If empty, the host agent generates one.
|
|
string sandbox_id = 5;
|
|
|
|
// Template name (e.g., "minimal", "python311"). Determines base rootfs.
|
|
string template = 1;
|
|
|
|
// Number of virtual CPUs (default: 1).
|
|
int32 vcpus = 2;
|
|
|
|
// Memory in MB (default: 512).
|
|
int32 memory_mb = 3;
|
|
|
|
// TTL in seconds. Sandbox is auto-paused after this duration of
|
|
// inactivity. 0 means no auto-pause.
|
|
int32 timeout_sec = 4;
|
|
|
|
// Disk size in MB for the rootfs. Base images are expanded to this size
|
|
// at host agent startup. Default: 5120 (5 GB).
|
|
int32 disk_size_mb = 6;
|
|
}
|
|
|
|
message CreateSandboxResponse {
|
|
string sandbox_id = 1;
|
|
string status = 2;
|
|
string host_ip = 3;
|
|
}
|
|
|
|
message DestroySandboxRequest {
|
|
string sandbox_id = 1;
|
|
}
|
|
|
|
message DestroySandboxResponse {}
|
|
|
|
message PauseSandboxRequest {
|
|
string sandbox_id = 1;
|
|
}
|
|
|
|
message PauseSandboxResponse {}
|
|
|
|
message ResumeSandboxRequest {
|
|
string sandbox_id = 1;
|
|
|
|
// TTL in seconds restored from the DB so the reaper can auto-pause
|
|
// the sandbox again after inactivity. 0 means no auto-pause.
|
|
int32 timeout_sec = 2;
|
|
}
|
|
|
|
message ResumeSandboxResponse {
|
|
string sandbox_id = 1;
|
|
string status = 2;
|
|
string host_ip = 3;
|
|
}
|
|
|
|
message CreateSnapshotRequest {
|
|
string sandbox_id = 1;
|
|
string name = 2;
|
|
}
|
|
|
|
message CreateSnapshotResponse {
|
|
string name = 1;
|
|
int64 size_bytes = 2;
|
|
}
|
|
|
|
message DeleteSnapshotRequest {
|
|
string name = 1;
|
|
}
|
|
|
|
message DeleteSnapshotResponse {}
|
|
|
|
message ExecRequest {
|
|
string sandbox_id = 1;
|
|
string cmd = 2;
|
|
repeated string args = 3;
|
|
// Timeout for the command in seconds (default: 30).
|
|
int32 timeout_sec = 4;
|
|
}
|
|
|
|
message ExecResponse {
|
|
bytes stdout = 1;
|
|
bytes stderr = 2;
|
|
int32 exit_code = 3;
|
|
}
|
|
|
|
message ListSandboxesRequest {}
|
|
|
|
message ListSandboxesResponse {
|
|
repeated SandboxInfo sandboxes = 1;
|
|
|
|
// IDs of sandboxes that were automatically paused by the TTL reaper
|
|
// since the last call. Drained on read.
|
|
repeated string auto_paused_sandbox_ids = 2;
|
|
}
|
|
|
|
message SandboxInfo {
|
|
string sandbox_id = 1;
|
|
string status = 2;
|
|
string template = 3;
|
|
int32 vcpus = 4;
|
|
int32 memory_mb = 5;
|
|
string host_ip = 6;
|
|
int64 created_at_unix = 7;
|
|
int64 last_active_at_unix = 8;
|
|
int32 timeout_sec = 9;
|
|
}
|
|
|
|
message WriteFileRequest {
|
|
string sandbox_id = 1;
|
|
string path = 2;
|
|
bytes content = 3;
|
|
}
|
|
|
|
message WriteFileResponse {}
|
|
|
|
message ReadFileRequest {
|
|
string sandbox_id = 1;
|
|
string path = 2;
|
|
}
|
|
|
|
message ReadFileResponse {
|
|
bytes content = 1;
|
|
}
|
|
|
|
// ── Streaming Exec ──────────────────────────────────────────────────
|
|
|
|
message ExecStreamRequest {
|
|
string sandbox_id = 1;
|
|
string cmd = 2;
|
|
repeated string args = 3;
|
|
int32 timeout_sec = 4;
|
|
}
|
|
|
|
message ExecStreamResponse {
|
|
oneof event {
|
|
ExecStreamStart start = 1;
|
|
ExecStreamData data = 2;
|
|
ExecStreamEnd end = 3;
|
|
}
|
|
}
|
|
|
|
message ExecStreamStart {
|
|
uint32 pid = 1;
|
|
}
|
|
|
|
message ExecStreamData {
|
|
oneof output {
|
|
bytes stdout = 1;
|
|
bytes stderr = 2;
|
|
}
|
|
}
|
|
|
|
message ExecStreamEnd {
|
|
int32 exit_code = 1;
|
|
string error = 2;
|
|
}
|
|
|
|
// ── Streaming File Transfer ─────────────────────────────────────────
|
|
|
|
message WriteFileStreamRequest {
|
|
oneof content {
|
|
WriteFileStreamMeta meta = 1;
|
|
bytes chunk = 2;
|
|
}
|
|
}
|
|
|
|
message WriteFileStreamMeta {
|
|
string sandbox_id = 1;
|
|
string path = 2;
|
|
}
|
|
|
|
message WriteFileStreamResponse {}
|
|
|
|
message ReadFileStreamRequest {
|
|
string sandbox_id = 1;
|
|
string path = 2;
|
|
}
|
|
|
|
message ReadFileStreamResponse {
|
|
bytes chunk = 1;
|
|
}
|
|
|
|
// ── Ping ────────────────────────────────────────────────────────────
|
|
|
|
message PingSandboxRequest {
|
|
string sandbox_id = 1;
|
|
}
|
|
|
|
message PingSandboxResponse {}
|
|
|
|
|
|
|
|
// ── Terminate ────────────────────────────────────────────────────────
|
|
|
|
message TerminateRequest {}
|
|
|
|
message TerminateResponse {}
|
|
|
|
// ── Metrics ──────────────────────────────────────────────────────────
|
|
|
|
message MetricPoint {
|
|
int64 timestamp_unix = 1;
|
|
double cpu_pct = 2;
|
|
int64 mem_bytes = 3;
|
|
int64 disk_bytes = 4;
|
|
}
|
|
|
|
message GetSandboxMetricsRequest {
|
|
string sandbox_id = 1;
|
|
// Range tier: "10m", "2h", or "24h".
|
|
string range = 2;
|
|
}
|
|
|
|
message GetSandboxMetricsResponse {
|
|
repeated MetricPoint points = 1;
|
|
}
|
|
|
|
message FlushSandboxMetricsRequest {
|
|
string sandbox_id = 1;
|
|
}
|
|
|
|
message FlushSandboxMetricsResponse {
|
|
repeated MetricPoint points_10m = 1;
|
|
repeated MetricPoint points_2h = 2;
|
|
repeated MetricPoint points_24h = 3;
|
|
}
|
|
|
|
// ── FlattenRootfs ────────────────────────────────────────────────────
|
|
|
|
message FlattenRootfsRequest {
|
|
string sandbox_id = 1;
|
|
string name = 2; // template name — output written to images/{name}/rootfs.ext4
|
|
}
|
|
|
|
message FlattenRootfsResponse {
|
|
int64 size_bytes = 1;
|
|
}
|