1
0
forked from wrenn/wrenn

Add per-sandbox CPU/memory/disk metrics collection

Samples /proc/{fc_pid}/stat (CPU%), /proc/{fc_pid}/status (VmRSS), and
stat() on CoW files at 500ms intervals per running sandbox. Three tiered
ring buffers downsample into 30s and 5min averages for 10min/2h/24h
retention. Metrics are flushed to DB on pause (all tiers) and destroy
(24h only). New GetSandboxMetrics and FlushSandboxMetrics RPCs on the
host agent, proxied through GET /v1/sandboxes/{id}/metrics?range= on
the control plane. Returns live data for running sandboxes, DB data for
paused, and 404 for stopped.
This commit is contained in:
2026-03-25 20:10:33 +06:00
parent 7473c15f52
commit 9acdbb5ae9
16 changed files with 1430 additions and 90 deletions

View File

@ -54,6 +54,13 @@ service HostAgentService {
// agent shuts down without waiting for the next heartbeat cycle.
rpc Terminate(TerminateRequest) returns (TerminateResponse);
// GetSandboxMetrics returns ring buffer metrics for a running sandbox.
rpc GetSandboxMetrics(GetSandboxMetricsRequest) returns (GetSandboxMetricsResponse);
// FlushSandboxMetrics returns all ring buffer tiers and clears them.
// Called by the control plane before pause/destroy to persist metrics to DB.
rpc FlushSandboxMetrics(FlushSandboxMetricsRequest) returns (FlushSandboxMetricsResponse);
}
message CreateSandboxRequest {
@ -248,3 +255,32 @@ message PingSandboxResponse {}
message TerminateRequest {}
message TerminateResponse {}
// ── Metrics ──────────────────────────────────────────────────────────
message MetricPoint {
int64 timestamp_unix = 1;
double cpu_pct = 2;
int64 mem_bytes = 3;
int64 disk_bytes = 4;
}
message GetSandboxMetricsRequest {
string sandbox_id = 1;
// Range tier: "10m", "2h", or "24h".
string range = 2;
}
message GetSandboxMetricsResponse {
repeated MetricPoint points = 1;
}
message FlushSandboxMetricsRequest {
string sandbox_id = 1;
}
message FlushSandboxMetricsResponse {
repeated MetricPoint points_10m = 1;
repeated MetricPoint points_2h = 2;
repeated MetricPoint points_24h = 3;
}