diff --git a/db/migrations/20260325135035_add_sandbox_metric_points.sql b/db/migrations/20260325135035_add_sandbox_metric_points.sql new file mode 100644 index 0000000..08e8683 --- /dev/null +++ b/db/migrations/20260325135035_add_sandbox_metric_points.sql @@ -0,0 +1,16 @@ +-- +goose Up +CREATE TABLE sandbox_metric_points ( + sandbox_id TEXT NOT NULL, + tier TEXT NOT NULL CHECK (tier IN ('10m', '2h', '24h')), + ts BIGINT NOT NULL, + cpu_pct FLOAT8 NOT NULL DEFAULT 0, + mem_bytes BIGINT NOT NULL DEFAULT 0, + disk_bytes BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (sandbox_id, tier, ts) +); + +CREATE INDEX idx_sandbox_metric_points_sandbox_tier + ON sandbox_metric_points (sandbox_id, tier); + +-- +goose Down +DROP TABLE IF EXISTS sandbox_metric_points; diff --git a/db/queries/metrics.sql b/db/queries/metrics.sql index 325df8d..3b6ad0b 100644 --- a/db/queries/metrics.sql +++ b/db/queries/metrics.sql @@ -27,6 +27,30 @@ WHERE team_id = $1 DELETE FROM sandbox_metrics_snapshots WHERE sampled_at < NOW() - INTERVAL '60 days'; +-- name: InsertSandboxMetricPoint :exec +INSERT INTO sandbox_metric_points (sandbox_id, tier, ts, cpu_pct, mem_bytes, disk_bytes) +VALUES ($1, $2, $3, $4, $5, $6) +ON CONFLICT (sandbox_id, tier, ts) DO NOTHING; + +-- name: GetSandboxMetricPoints :many +SELECT ts, cpu_pct, mem_bytes, disk_bytes +FROM sandbox_metric_points +WHERE sandbox_id = $1 AND tier = $2 +ORDER BY ts ASC; + +-- name: DeleteSandboxMetricPoints :exec +DELETE FROM sandbox_metric_points +WHERE sandbox_id = $1; + +-- name: DeleteSandboxMetricPointsByTier :exec +DELETE FROM sandbox_metric_points +WHERE sandbox_id = $1 AND tier = $2; + +-- name: PruneSandboxMetricPoints :exec +-- Remove metric points older than 30 days for destroyed sandboxes. +DELETE FROM sandbox_metric_points +WHERE ts < EXTRACT(EPOCH FROM NOW() - INTERVAL '30 days')::BIGINT; + -- name: SampleSandboxMetrics :many -- Aggregates per-team resource usage from the live sandboxes table. -- Groups by all teams that have any sandbox row (including stopped) so that diff --git a/internal/api/handlers_metrics.go b/internal/api/handlers_metrics.go new file mode 100644 index 0000000..1efbafa --- /dev/null +++ b/internal/api/handlers_metrics.go @@ -0,0 +1,130 @@ +package api + +import ( + "context" + "net/http" + + "connectrpc.com/connect" + "github.com/go-chi/chi/v5" + + "git.omukk.dev/wrenn/sandbox/internal/auth" + "git.omukk.dev/wrenn/sandbox/internal/db" + "git.omukk.dev/wrenn/sandbox/internal/lifecycle" + pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen" +) + +type sandboxMetricsHandler struct { + db *db.Queries + pool *lifecycle.HostClientPool +} + +func newSandboxMetricsHandler(db *db.Queries, pool *lifecycle.HostClientPool) *sandboxMetricsHandler { + return &sandboxMetricsHandler{db: db, pool: pool} +} + +type metricPointResponse struct { + TimestampUnix int64 `json:"timestamp_unix"` + CPUPct float64 `json:"cpu_pct"` + MemBytes int64 `json:"mem_bytes"` + DiskBytes int64 `json:"disk_bytes"` +} + +type metricsResponse struct { + SandboxID string `json:"sandbox_id"` + Range string `json:"range"` + Points []metricPointResponse `json:"points"` +} + +// GetMetrics handles GET /v1/sandboxes/{id}/metrics?range=10m|2h|24h. +func (h *sandboxMetricsHandler) GetMetrics(w http.ResponseWriter, r *http.Request) { + sandboxID := chi.URLParam(r, "id") + ctx := r.Context() + ac := auth.MustFromContext(ctx) + + rangeTier := r.URL.Query().Get("range") + if rangeTier == "" { + rangeTier = "10m" + } + if rangeTier != "10m" && rangeTier != "2h" && rangeTier != "24h" { + writeError(w, http.StatusBadRequest, "invalid_request", "range must be 10m, 2h, or 24h") + return + } + + sb, err := h.db.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: ac.TeamID}) + if err != nil { + writeError(w, http.StatusNotFound, "not_found", "sandbox not found") + return + } + + switch sb.Status { + case "running": + h.getFromAgent(w, r, sandboxID, rangeTier, sb.HostID) + case "paused": + h.getFromDB(ctx, w, sandboxID, rangeTier) + default: + writeError(w, http.StatusNotFound, "not_found", "metrics not available for sandbox in state: "+sb.Status) + } +} + +func (h *sandboxMetricsHandler) getFromAgent(w http.ResponseWriter, r *http.Request, sandboxID, rangeTier, hostID string) { + ctx := r.Context() + + agent, err := agentForHost(ctx, h.db, h.pool, hostID) + if err != nil { + writeError(w, http.StatusServiceUnavailable, "host_unavailable", "sandbox host is not reachable") + return + } + + resp, err := agent.GetSandboxMetrics(ctx, connect.NewRequest(&pb.GetSandboxMetricsRequest{ + SandboxId: sandboxID, + Range: rangeTier, + })) + if err != nil { + status, code, msg := agentErrToHTTP(err) + writeError(w, status, code, msg) + return + } + + points := make([]metricPointResponse, len(resp.Msg.Points)) + for i, p := range resp.Msg.Points { + points[i] = metricPointResponse{ + TimestampUnix: p.TimestampUnix, + CPUPct: p.CpuPct, + MemBytes: p.MemBytes, + DiskBytes: p.DiskBytes, + } + } + + writeJSON(w, http.StatusOK, metricsResponse{ + SandboxID: sandboxID, + Range: rangeTier, + Points: points, + }) +} + +func (h *sandboxMetricsHandler) getFromDB(ctx context.Context, w http.ResponseWriter, sandboxID, rangeTier string) { + rows, err := h.db.GetSandboxMetricPoints(ctx, db.GetSandboxMetricPointsParams{ + SandboxID: sandboxID, + Tier: rangeTier, + }) + if err != nil { + writeError(w, http.StatusInternalServerError, "internal_error", "failed to read metrics") + return + } + + points := make([]metricPointResponse, len(rows)) + for i, row := range rows { + points[i] = metricPointResponse{ + TimestampUnix: row.Ts, + CPUPct: row.CpuPct, + MemBytes: row.MemBytes, + DiskBytes: row.DiskBytes, + } + } + + writeJSON(w, http.StatusOK, metricsResponse{ + SandboxID: sandboxID, + Range: rangeTier, + Points: points, + }) +} diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index 86e88c6..2b627f1 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -751,6 +751,60 @@ paths: schema: $ref: "#/components/schemas/Error" + /v1/sandboxes/{id}/metrics: + parameters: + - name: id + in: path + required: true + schema: + type: string + + get: + summary: Get per-sandbox resource metrics + operationId: getSandboxMetrics + tags: [sandboxes] + security: + - apiKeyAuth: [] + - bearerAuth: [] + description: | + Returns time-series CPU, memory, and disk metrics for a sandbox. + Three tiers are available with different granularity and retention: + - `10m`: 500ms samples, last 10 minutes + - `2h`: 30-second averages, last 2 hours + - `24h`: 5-minute averages, last 24 hours + + For running sandboxes, data comes from the host agent's in-memory + ring buffer. For paused sandboxes, data is read from persisted + snapshots in the database. Stopped/destroyed sandboxes return 404. + parameters: + - name: range + in: query + required: false + schema: + type: string + enum: ["10m", "2h", "24h"] + default: "10m" + description: Time range tier to query + responses: + "200": + description: Metrics retrieved + content: + application/json: + schema: + $ref: "#/components/schemas/SandboxMetrics" + "400": + description: Invalid range parameter + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "404": + description: Sandbox not found or metrics not available + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /v1/sandboxes/{id}/pause: parameters: - name: id @@ -1981,6 +2035,38 @@ components: items: $ref: "#/components/schemas/TeamMember" + SandboxMetrics: + type: object + properties: + sandbox_id: + type: string + range: + type: string + enum: ["10m", "2h", "24h"] + points: + type: array + items: + $ref: "#/components/schemas/MetricPoint" + + MetricPoint: + type: object + properties: + timestamp_unix: + type: integer + format: int64 + cpu_pct: + type: number + format: double + description: "CPU utilization percentage (0-100), normalized to vCPU count" + mem_bytes: + type: integer + format: int64 + description: "Resident memory in bytes (VmRSS of Firecracker process)" + disk_bytes: + type: integer + format: int64 + description: "Allocated disk bytes for the CoW sparse file" + Error: type: object properties: diff --git a/internal/api/server.go b/internal/api/server.go index 636d1d1..67043e8 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -64,6 +64,7 @@ func New( usersH := newUsersHandler(teamSvc) auditH := newAuditHandler(auditSvc) statsH := newStatsHandler(statsSvc) + metricsH := newSandboxMetricsHandler(queries, pool) // OpenAPI spec and docs. r.Get("/openapi.yaml", serveOpenAPI) @@ -125,6 +126,7 @@ func New( r.Post("/files/read", files.Download) r.Post("/files/stream/write", filesStream.StreamUpload) r.Post("/files/stream/read", filesStream.StreamDownload) + r.Get("/metrics", metricsH.GetMetrics) }) }) diff --git a/internal/db/metrics.sql.go b/internal/db/metrics.sql.go index dffc039..b719caa 100644 --- a/internal/db/metrics.sql.go +++ b/internal/db/metrics.sql.go @@ -9,6 +9,31 @@ import ( "context" ) +const deleteSandboxMetricPoints = `-- name: DeleteSandboxMetricPoints :exec +DELETE FROM sandbox_metric_points +WHERE sandbox_id = $1 +` + +func (q *Queries) DeleteSandboxMetricPoints(ctx context.Context, sandboxID string) error { + _, err := q.db.Exec(ctx, deleteSandboxMetricPoints, sandboxID) + return err +} + +const deleteSandboxMetricPointsByTier = `-- name: DeleteSandboxMetricPointsByTier :exec +DELETE FROM sandbox_metric_points +WHERE sandbox_id = $1 AND tier = $2 +` + +type DeleteSandboxMetricPointsByTierParams struct { + SandboxID string `json:"sandbox_id"` + Tier string `json:"tier"` +} + +func (q *Queries) DeleteSandboxMetricPointsByTier(ctx context.Context, arg DeleteSandboxMetricPointsByTierParams) error { + _, err := q.db.Exec(ctx, deleteSandboxMetricPointsByTier, arg.SandboxID, arg.Tier) + return err +} + const getLiveMetrics = `-- name: GetLiveMetrics :one SELECT (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, @@ -58,6 +83,50 @@ func (q *Queries) GetPeakMetrics(ctx context.Context, teamID string) (GetPeakMet return i, err } +const getSandboxMetricPoints = `-- name: GetSandboxMetricPoints :many +SELECT ts, cpu_pct, mem_bytes, disk_bytes +FROM sandbox_metric_points +WHERE sandbox_id = $1 AND tier = $2 +ORDER BY ts ASC +` + +type GetSandboxMetricPointsParams struct { + SandboxID string `json:"sandbox_id"` + Tier string `json:"tier"` +} + +type GetSandboxMetricPointsRow struct { + Ts int64 `json:"ts"` + CpuPct float64 `json:"cpu_pct"` + MemBytes int64 `json:"mem_bytes"` + DiskBytes int64 `json:"disk_bytes"` +} + +func (q *Queries) GetSandboxMetricPoints(ctx context.Context, arg GetSandboxMetricPointsParams) ([]GetSandboxMetricPointsRow, error) { + rows, err := q.db.Query(ctx, getSandboxMetricPoints, arg.SandboxID, arg.Tier) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetSandboxMetricPointsRow + for rows.Next() { + var i GetSandboxMetricPointsRow + if err := rows.Scan( + &i.Ts, + &i.CpuPct, + &i.MemBytes, + &i.DiskBytes, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const insertMetricsSnapshot = `-- name: InsertMetricsSnapshot :exec INSERT INTO sandbox_metrics_snapshots (team_id, running_count, vcpus_reserved, memory_mb_reserved) VALUES ($1, $2, $3, $4) @@ -80,6 +149,33 @@ func (q *Queries) InsertMetricsSnapshot(ctx context.Context, arg InsertMetricsSn return err } +const insertSandboxMetricPoint = `-- name: InsertSandboxMetricPoint :exec +INSERT INTO sandbox_metric_points (sandbox_id, tier, ts, cpu_pct, mem_bytes, disk_bytes) +VALUES ($1, $2, $3, $4, $5, $6) +ON CONFLICT (sandbox_id, tier, ts) DO NOTHING +` + +type InsertSandboxMetricPointParams struct { + SandboxID string `json:"sandbox_id"` + Tier string `json:"tier"` + Ts int64 `json:"ts"` + CpuPct float64 `json:"cpu_pct"` + MemBytes int64 `json:"mem_bytes"` + DiskBytes int64 `json:"disk_bytes"` +} + +func (q *Queries) InsertSandboxMetricPoint(ctx context.Context, arg InsertSandboxMetricPointParams) error { + _, err := q.db.Exec(ctx, insertSandboxMetricPoint, + arg.SandboxID, + arg.Tier, + arg.Ts, + arg.CpuPct, + arg.MemBytes, + arg.DiskBytes, + ) + return err +} + const pruneOldMetrics = `-- name: PruneOldMetrics :exec DELETE FROM sandbox_metrics_snapshots WHERE sampled_at < NOW() - INTERVAL '60 days' @@ -90,6 +186,17 @@ func (q *Queries) PruneOldMetrics(ctx context.Context) error { return err } +const pruneSandboxMetricPoints = `-- name: PruneSandboxMetricPoints :exec +DELETE FROM sandbox_metric_points +WHERE ts < EXTRACT(EPOCH FROM NOW() - INTERVAL '30 days')::BIGINT +` + +// Remove metric points older than 30 days for destroyed sandboxes. +func (q *Queries) PruneSandboxMetricPoints(ctx context.Context) error { + _, err := q.db.Exec(ctx, pruneSandboxMetricPoints) + return err +} + const sampleSandboxMetrics = `-- name: SampleSandboxMetrics :many SELECT team_id, diff --git a/internal/db/models.go b/internal/db/models.go index df2981e..0128f4a 100644 --- a/internal/db/models.go +++ b/internal/db/models.go @@ -99,6 +99,15 @@ type Sandbox struct { TeamID string `json:"team_id"` } +type SandboxMetricPoint struct { + SandboxID string `json:"sandbox_id"` + Tier string `json:"tier"` + Ts int64 `json:"ts"` + CpuPct float64 `json:"cpu_pct"` + MemBytes int64 `json:"mem_bytes"` + DiskBytes int64 `json:"disk_bytes"` +} + type SandboxMetricsSnapshot struct { ID int64 `json:"id"` TeamID string `json:"team_id"` diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go index c0a4cfd..fb7fb66 100644 --- a/internal/hostagent/server.go +++ b/internal/hostagent/server.go @@ -426,3 +426,55 @@ func (s *Server) Terminate( } return connect.NewResponse(&pb.TerminateResponse{}), nil } + +func (s *Server) GetSandboxMetrics( + _ context.Context, + req *connect.Request[pb.GetSandboxMetricsRequest], +) (*connect.Response[pb.GetSandboxMetricsResponse], error) { + msg := req.Msg + + points, err := s.mgr.GetMetrics(msg.SandboxId, msg.Range) + if err != nil { + if strings.Contains(err.Error(), "not found") { + return nil, connect.NewError(connect.CodeNotFound, err) + } + if strings.Contains(err.Error(), "invalid range") { + return nil, connect.NewError(connect.CodeInvalidArgument, err) + } + return nil, connect.NewError(connect.CodeInternal, err) + } + + return connect.NewResponse(&pb.GetSandboxMetricsResponse{Points: metricPointsToPB(points)}), nil +} + +func (s *Server) FlushSandboxMetrics( + _ context.Context, + req *connect.Request[pb.FlushSandboxMetricsRequest], +) (*connect.Response[pb.FlushSandboxMetricsResponse], error) { + pts10m, pts2h, pts24h, err := s.mgr.FlushMetrics(req.Msg.SandboxId) + if err != nil { + if strings.Contains(err.Error(), "not found") { + return nil, connect.NewError(connect.CodeNotFound, err) + } + return nil, connect.NewError(connect.CodeInternal, err) + } + + return connect.NewResponse(&pb.FlushSandboxMetricsResponse{ + Points_10M: metricPointsToPB(pts10m), + Points_2H: metricPointsToPB(pts2h), + Points_24H: metricPointsToPB(pts24h), + }), nil +} + +func metricPointsToPB(pts []sandbox.MetricPoint) []*pb.MetricPoint { + out := make([]*pb.MetricPoint, len(pts)) + for i, p := range pts { + out[i] = &pb.MetricPoint{ + TimestampUnix: p.Timestamp.Unix(), + CpuPct: p.CPUPct, + MemBytes: p.MemBytes, + DiskBytes: p.DiskBytes, + } + } + return out +} diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index bf7d057..87a3289 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -58,6 +58,12 @@ type sandboxState struct { // sandbox was restored. Non-nil means re-pause should use "Diff" snapshot // type instead of "Full", avoiding the UFFD fault-in storm. parent *snapshotParent + + // Metrics sampling state. + fcPID int // Firecracker process PID (child of unshare wrapper) + ring *metricsRing // tiered ring buffers for CPU/mem/disk metrics + samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine + samplerDone chan struct{} // closed when the sampling goroutine exits } // snapshotParent stores the previous generation's snapshot state so that @@ -232,6 +238,8 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, m.boxes[sandboxID] = sb m.mu.Unlock() + m.startSampler(sb) + slog.Info("sandbox created", "id", sandboxID, "template", template, @@ -265,6 +273,7 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error { // cleanup tears down all resources for a sandbox. func (m *Manager) cleanup(ctx context.Context, sb *sandboxState) { + m.stopSampler(sb) if err := m.vm.Destroy(ctx, sb.ID); err != nil { slog.Warn("vm destroy error", "id", sb.ID, "error", err) } @@ -668,6 +677,8 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string, timeoutSec int) m.boxes[sandboxID] = sb m.mu.Unlock() + m.startSampler(sb) + // Don't delete snapshot dir — diff files are needed for re-pause. // The CoW file was already moved out. The dir will be cleaned up // on destroy or overwritten on re-pause. @@ -987,6 +998,8 @@ func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotNam m.boxes[sandboxID] = sb m.mu.Unlock() + m.startSampler(sb) + slog.Info("sandbox created from snapshot", "id", sandboxID, "snapshot", snapshotName, @@ -1213,6 +1226,158 @@ func warnErr(msg string, id string, err error) { } } +// startSampler resolves the Firecracker child PID and starts a background +// goroutine that samples CPU/mem/disk at 500ms intervals into the ring buffer. +// Must be called after the sandbox is registered in m.boxes. +func (m *Manager) startSampler(sb *sandboxState) { + // Resolve the Firecracker PID (child of unshare wrapper). + v, ok := m.vm.Get(sb.ID) + if !ok { + slog.Warn("metrics: VM not found, skipping sampler", "id", sb.ID) + return + } + unshPID := v.PID() + + var fcPID int + for attempt := 0; attempt < 5; attempt++ { + var err error + fcPID, err = findChildPID(unshPID) + if err == nil { + break + } + if attempt == 4 { + slog.Warn("metrics: could not resolve FC PID, skipping sampler", "id", sb.ID, "error", err) + return + } + time.Sleep(50 * time.Millisecond) + } + + sb.fcPID = fcPID + sb.ring = newMetricsRing() + + ctx, cancel := context.WithCancel(context.Background()) + sb.samplerCancel = cancel + sb.samplerDone = make(chan struct{}) + + // Read initial CPU counters for delta calculation. + // Passed to goroutine as local state — no shared mutation. + initialCPU, err := readCPUStat(fcPID) + if err != nil { + slog.Warn("metrics: could not read initial CPU stat", "id", sb.ID, "error", err) + } + + go m.samplerLoop(ctx, sb, fcPID, sb.VCPUs, initialCPU) +} + +// samplerLoop samples /proc metrics at 500ms intervals. +// lastCPU is goroutine-local to avoid shared-state races. +func (m *Manager) samplerLoop(ctx context.Context, sb *sandboxState, fcPID, vcpus int, lastCPU cpuStat) { + defer close(sb.samplerDone) + + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + clkTck := 100.0 // sysconf(_SC_CLK_TCK), almost always 100 on Linux + lastTime := time.Now() + cpuInitialized := lastCPU != (cpuStat{}) + + for { + select { + case <-ctx.Done(): + return + case now := <-ticker.C: + elapsed := now.Sub(lastTime).Seconds() + lastTime = now + + // CPU: delta jiffies / (elapsed * CLK_TCK * vcpus) * 100 + var cpuPct float64 + cur, err := readCPUStat(fcPID) + if err == nil { + if cpuInitialized && elapsed > 0 && vcpus > 0 { + deltaJiffies := float64((cur.utime + cur.stime) - (lastCPU.utime + lastCPU.stime)) + cpuPct = (deltaJiffies / (elapsed * clkTck * float64(vcpus))) * 100.0 + if cpuPct > 100.0 { + cpuPct = 100.0 + } + if cpuPct < 0 { + cpuPct = 0 + } + } + lastCPU = cur + cpuInitialized = true + } + + // Memory: VmRSS of the Firecracker process. + memBytes, _ := readMemRSS(fcPID) + + // Disk: allocated bytes of the CoW sparse file. + var diskBytes int64 + if sb.dmDevice != nil { + diskBytes, _ = readDiskAllocated(sb.dmDevice.CowPath) + } + + sb.ring.Push(MetricPoint{ + Timestamp: now, + CPUPct: cpuPct, + MemBytes: memBytes, + DiskBytes: diskBytes, + }) + } + } +} + +// stopSampler stops the metrics sampling goroutine and waits for it to exit. +func (m *Manager) stopSampler(sb *sandboxState) { + if sb.samplerCancel != nil { + sb.samplerCancel() + <-sb.samplerDone + sb.samplerCancel = nil + } +} + +// GetMetrics returns the ring buffer data for the given range tier. +// Valid ranges: "10m", "2h", "24h". +func (m *Manager) GetMetrics(sandboxID, rangeTier string) ([]MetricPoint, error) { + m.mu.RLock() + sb, ok := m.boxes[sandboxID] + m.mu.RUnlock() + if !ok { + return nil, fmt.Errorf("sandbox not found: %s", sandboxID) + } + if sb.ring == nil { + return nil, nil + } + + switch rangeTier { + case "10m": + return sb.ring.Get10m(), nil + case "2h": + return sb.ring.Get2h(), nil + case "24h": + return sb.ring.Get24h(), nil + default: + return nil, fmt.Errorf("invalid range: %s (valid: 10m, 2h, 24h)", rangeTier) + } +} + +// FlushMetrics returns all three tier ring buffers, clears the ring, and +// stops the sampler goroutine. Called by the control plane before pause/destroy. +func (m *Manager) FlushMetrics(sandboxID string) (pts10m, pts2h, pts24h []MetricPoint, err error) { + m.mu.RLock() + sb, ok := m.boxes[sandboxID] + m.mu.RUnlock() + if !ok { + return nil, nil, nil, fmt.Errorf("sandbox not found: %s", sandboxID) + } + + m.stopSampler(sb) + if sb.ring == nil { + return nil, nil, nil, nil + } + pts10m, pts2h, pts24h = sb.ring.Flush() + return pts10m, pts2h, pts24h, nil +} + // copyFile copies a regular file from src to dst using streaming I/O. func copyFile(src, dst string) error { sf, err := os.Open(src) diff --git a/internal/sandbox/metrics.go b/internal/sandbox/metrics.go new file mode 100644 index 0000000..f266cb2 --- /dev/null +++ b/internal/sandbox/metrics.go @@ -0,0 +1,178 @@ +package sandbox + +import ( + "sync" + "time" +) + +// MetricPoint holds one metrics sample. +type MetricPoint struct { + Timestamp time.Time + CPUPct float64 + MemBytes int64 + DiskBytes int64 +} + +// Ring buffer capacity constants. +const ( + ring10mCap = 1200 // 500ms × 1200 = 10 min + ring2hCap = 240 // 30s × 240 = 2 h + ring24hCap = 288 // 5min × 288 = 24 h + + downsample2hEvery = 60 // 60 × 500ms = 30s + downsample24hEvery = 10 // 10 × 30s = 5min +) + +// metricsRing holds three tiered ring buffers with automatic downsampling +// from the finest tier into coarser tiers. +type metricsRing struct { + mu sync.Mutex + + // 10-minute tier: 500ms samples. + buf10m [ring10mCap]MetricPoint + idx10m int + count10m int + + // 2-hour tier: 30s averages. + buf2h [ring2hCap]MetricPoint + idx2h int + count2h int + + // 24-hour tier: 5min averages. + buf24h [ring24hCap]MetricPoint + idx24h int + count24h int + + // Accumulators for downsampling. + acc500ms [downsample2hEvery]MetricPoint + acc500msN int + + acc30s [downsample24hEvery]MetricPoint + acc30sN int +} + +// newMetricsRing creates an empty metrics ring buffer. +func newMetricsRing() *metricsRing { + return &metricsRing{} +} + +// Push adds a 500ms sample to the finest tier and triggers downsampling +// into coarser tiers when enough samples have accumulated. +func (r *metricsRing) Push(p MetricPoint) { + r.mu.Lock() + defer r.mu.Unlock() + + // Write to 10m ring. + r.buf10m[r.idx10m] = p + r.idx10m = (r.idx10m + 1) % ring10mCap + if r.count10m < ring10mCap { + r.count10m++ + } + + // Accumulate for 2h downsample. + r.acc500ms[r.acc500msN] = p + r.acc500msN++ + if r.acc500msN == downsample2hEvery { + avg := averagePoints(r.acc500ms[:downsample2hEvery]) + r.push2h(avg) + r.acc500msN = 0 + } +} + +func (r *metricsRing) push2h(p MetricPoint) { + r.buf2h[r.idx2h] = p + r.idx2h = (r.idx2h + 1) % ring2hCap + if r.count2h < ring2hCap { + r.count2h++ + } + + // Accumulate for 24h downsample. + r.acc30s[r.acc30sN] = p + r.acc30sN++ + if r.acc30sN == downsample24hEvery { + avg := averagePoints(r.acc30s[:downsample24hEvery]) + r.push24h(avg) + r.acc30sN = 0 + } +} + +func (r *metricsRing) push24h(p MetricPoint) { + r.buf24h[r.idx24h] = p + r.idx24h = (r.idx24h + 1) % ring24hCap + if r.count24h < ring24hCap { + r.count24h++ + } +} + +// Get10m returns the 10-minute tier points in chronological order. +func (r *metricsRing) Get10m() []MetricPoint { + r.mu.Lock() + defer r.mu.Unlock() + return r.readRing(r.buf10m[:], r.idx10m, r.count10m) +} + +// Get2h returns the 2-hour tier points in chronological order. +func (r *metricsRing) Get2h() []MetricPoint { + r.mu.Lock() + defer r.mu.Unlock() + return r.readRing(r.buf2h[:], r.idx2h, r.count2h) +} + +// Get24h returns the 24-hour tier points in chronological order. +func (r *metricsRing) Get24h() []MetricPoint { + r.mu.Lock() + defer r.mu.Unlock() + return r.readRing(r.buf24h[:], r.idx24h, r.count24h) +} + +// Flush returns all three tiers and resets the ring buffer. +func (r *metricsRing) Flush() (pts10m, pts2h, pts24h []MetricPoint) { + r.mu.Lock() + defer r.mu.Unlock() + + pts10m = r.readRing(r.buf10m[:], r.idx10m, r.count10m) + pts2h = r.readRing(r.buf2h[:], r.idx2h, r.count2h) + pts24h = r.readRing(r.buf24h[:], r.idx24h, r.count24h) + + // Reset all state. + r.idx10m, r.count10m = 0, 0 + r.idx2h, r.count2h = 0, 0 + r.idx24h, r.count24h = 0, 0 + r.acc500msN = 0 + r.acc30sN = 0 + + return pts10m, pts2h, pts24h +} + +// readRing extracts elements from a circular buffer in chronological order. +func (r *metricsRing) readRing(buf []MetricPoint, nextIdx, count int) []MetricPoint { + if count == 0 { + return nil + } + result := make([]MetricPoint, count) + bufLen := len(buf) + start := (nextIdx - count + bufLen) % bufLen + for i := range count { + result[i] = buf[(start+i)%bufLen] + } + return result +} + +// averagePoints computes the average of a slice of MetricPoints. +// The timestamp is set to the last point's timestamp. +func averagePoints(pts []MetricPoint) MetricPoint { + n := float64(len(pts)) + var cpu float64 + var mem, disk int64 + for _, p := range pts { + cpu += p.CPUPct + mem += p.MemBytes + disk += p.DiskBytes + } + return MetricPoint{ + Timestamp: pts[len(pts)-1].Timestamp, + CPUPct: cpu / n, + MemBytes: int64(float64(mem) / n), + DiskBytes: int64(float64(disk) / n), + } +} diff --git a/internal/sandbox/proc.go b/internal/sandbox/proc.go new file mode 100644 index 0000000..eb9a78f --- /dev/null +++ b/internal/sandbox/proc.go @@ -0,0 +1,105 @@ +package sandbox + +import ( + "fmt" + "os" + "strconv" + "strings" + "syscall" +) + +// findChildPID reads the direct child PID of a given parent process. +// The Firecracker process is a direct child of the unshare wrapper because +// the init script uses `exec ip netns exec ... firecracker`, which replaces +// bash with ip-netns-exec, which in turn execs firecracker — same PID, +// direct child of unshare. +func findChildPID(parentPID int) (int, error) { + path := fmt.Sprintf("/proc/%d/task/%d/children", parentPID, parentPID) + data, err := os.ReadFile(path) + if err != nil { + return 0, fmt.Errorf("read children: %w", err) + } + fields := strings.Fields(string(data)) + if len(fields) == 0 { + return 0, fmt.Errorf("no child processes found for PID %d", parentPID) + } + pid, err := strconv.Atoi(fields[0]) + if err != nil { + return 0, fmt.Errorf("parse child PID %q: %w", fields[0], err) + } + return pid, nil +} + +// cpuStat holds raw CPU jiffies read from /proc/{pid}/stat. +type cpuStat struct { + utime uint64 + stime uint64 +} + +// readCPUStat reads user and system CPU jiffies from /proc/{pid}/stat. +// Fields 14 (utime) and 15 (stime) are 1-indexed in the man page; +// after splitting on space, they are at indices 13 and 14. +func readCPUStat(pid int) (cpuStat, error) { + path := fmt.Sprintf("/proc/%d/stat", pid) + data, err := os.ReadFile(path) + if err != nil { + return cpuStat{}, fmt.Errorf("read stat: %w", err) + } + + // /proc/{pid}/stat format: pid (comm) state fields... + // The comm field may contain spaces and parens, so find the last ')' first. + content := string(data) + idx := strings.LastIndex(content, ")") + if idx < 0 { + return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: no closing paren", pid) + } + // After ")" there is " state field3 field4 ... fieldN" + // field1 after ')' is state (index 0), utime is field 11, stime is field 12 + // (0-indexed from after the closing paren). + fields := strings.Fields(content[idx+2:]) + if len(fields) < 13 { + return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: too few fields (%d)", pid, len(fields)) + } + utime, err := strconv.ParseUint(fields[11], 10, 64) + if err != nil { + return cpuStat{}, fmt.Errorf("parse utime: %w", err) + } + stime, err := strconv.ParseUint(fields[12], 10, 64) + if err != nil { + return cpuStat{}, fmt.Errorf("parse stime: %w", err) + } + return cpuStat{utime: utime, stime: stime}, nil +} + +// readMemRSS reads VmRSS from /proc/{pid}/status and returns bytes. +func readMemRSS(pid int) (int64, error) { + path := fmt.Sprintf("/proc/%d/status", pid) + data, err := os.ReadFile(path) + if err != nil { + return 0, fmt.Errorf("read status: %w", err) + } + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "VmRSS:") { + fields := strings.Fields(line) + if len(fields) < 2 { + return 0, fmt.Errorf("malformed VmRSS line") + } + kb, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + return 0, fmt.Errorf("parse VmRSS: %w", err) + } + return kb * 1024, nil + } + } + return 0, fmt.Errorf("VmRSS not found in /proc/%d/status", pid) +} + +// readDiskAllocated returns the actual allocated bytes (not apparent size) +// of the file at path. This uses stat's block count × 512. +func readDiskAllocated(path string) (int64, error) { + var stat syscall.Stat_t + if err := syscall.Stat(path, &stat); err != nil { + return 0, fmt.Errorf("stat %s: %w", path, err) + } + return stat.Blocks * 512, nil +} diff --git a/internal/service/sandbox.go b/internal/service/sandbox.go index f67eb0d..142b9bd 100644 --- a/internal/service/sandbox.go +++ b/internal/service/sandbox.go @@ -58,6 +58,8 @@ type hostagentClient = interface { PauseSandbox(ctx context.Context, req *connect.Request[pb.PauseSandboxRequest]) (*connect.Response[pb.PauseSandboxResponse], error) ResumeSandbox(ctx context.Context, req *connect.Request[pb.ResumeSandboxRequest]) (*connect.Response[pb.ResumeSandboxResponse], error) PingSandbox(ctx context.Context, req *connect.Request[pb.PingSandboxRequest]) (*connect.Response[pb.PingSandboxResponse], error) + GetSandboxMetrics(ctx context.Context, req *connect.Request[pb.GetSandboxMetricsRequest]) (*connect.Response[pb.GetSandboxMetricsResponse], error) + FlushSandboxMetrics(ctx context.Context, req *connect.Request[pb.FlushSandboxMetricsRequest]) (*connect.Response[pb.FlushSandboxMetricsResponse], error) } // Create creates a new sandbox: picks a host via the scheduler, inserts a pending @@ -180,6 +182,9 @@ func (s *SandboxService) Pause(ctx context.Context, sandboxID, teamID string) (d return db.Sandbox{}, err } + // Flush all metrics tiers before pausing so data survives in DB. + s.flushAndPersistMetrics(ctx, agent, sandboxID, true) + if _, err := agent.PauseSandbox(ctx, connect.NewRequest(&pb.PauseSandboxRequest{ SandboxId: sandboxID, })); err != nil { @@ -236,7 +241,8 @@ func (s *SandboxService) Resume(ctx context.Context, sandboxID, teamID string) ( // Destroy stops a sandbox and marks it as stopped. func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string) error { - if _, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID}); err != nil { + sb, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID}) + if err != nil { return fmt.Errorf("sandbox not found: %w", err) } @@ -245,6 +251,11 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string) return err } + // If running, flush 24h tier metrics for analytics before destroying. + if sb.Status == "running" { + s.flushAndPersistMetrics(ctx, agent, sandboxID, false) + } + // Destroy on host agent. A not-found response is fine — sandbox is already gone. if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{ SandboxId: sandboxID, @@ -252,6 +263,16 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string) return fmt.Errorf("agent destroy: %w", err) } + // For a paused sandbox, only keep 24h tier; remove the finer-grained tiers. + if sb.Status == "paused" { + _ = s.DB.DeleteSandboxMetricPointsByTier(ctx, db.DeleteSandboxMetricPointsByTierParams{ + SandboxID: sandboxID, Tier: "10m", + }) + _ = s.DB.DeleteSandboxMetricPointsByTier(ctx, db.DeleteSandboxMetricPointsByTierParams{ + SandboxID: sandboxID, Tier: "2h", + }) + } + if _, err := s.DB.UpdateSandboxStatus(ctx, db.UpdateSandboxStatusParams{ ID: sandboxID, Status: "stopped", }); err != nil { @@ -260,6 +281,41 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string) return nil } +// flushAndPersistMetrics calls FlushSandboxMetrics on the agent and stores +// the returned data to DB. If allTiers is true, all three tiers are saved; +// otherwise only the 24h tier (for post-destroy analytics). +func (s *SandboxService) flushAndPersistMetrics(ctx context.Context, agent hostagentClient, sandboxID string, allTiers bool) { + resp, err := agent.FlushSandboxMetrics(ctx, connect.NewRequest(&pb.FlushSandboxMetricsRequest{ + SandboxId: sandboxID, + })) + if err != nil { + slog.Warn("flush metrics failed (best-effort)", "sandbox_id", sandboxID, "error", err) + return + } + msg := resp.Msg + + if allTiers { + s.persistMetricPoints(ctx, sandboxID, "10m", msg.Points_10M) + s.persistMetricPoints(ctx, sandboxID, "2h", msg.Points_2H) + } + s.persistMetricPoints(ctx, sandboxID, "24h", msg.Points_24H) +} + +func (s *SandboxService) persistMetricPoints(ctx context.Context, sandboxID, tier string, points []*pb.MetricPoint) { + for _, p := range points { + if err := s.DB.InsertSandboxMetricPoint(ctx, db.InsertSandboxMetricPointParams{ + SandboxID: sandboxID, + Tier: tier, + Ts: p.TimestampUnix, + CpuPct: p.CpuPct, + MemBytes: p.MemBytes, + DiskBytes: p.DiskBytes, + }); err != nil { + slog.Warn("persist metric point failed", "sandbox_id", sandboxID, "tier", tier, "error", err) + } + } +} + // Ping resets the inactivity timer for a running sandbox. func (s *SandboxService) Ping(ctx context.Context, sandboxID, teamID string) error { sb, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID}) diff --git a/internal/vm/manager.go b/internal/vm/manager.go index b68bde1..c7e3479 100644 --- a/internal/vm/manager.go +++ b/internal/vm/manager.go @@ -250,6 +250,12 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath return vm, nil } +// PID returns the process ID of the unshare wrapper process. +// The actual Firecracker process is a direct child of this PID. +func (v *VM) PID() int { + return v.process.cmd.Process.Pid +} + // Get returns a running VM by sandbox ID. func (m *Manager) Get(sandboxID string) (*VM, bool) { vm, ok := m.vms[sandboxID] diff --git a/proto/hostagent/gen/hostagent.pb.go b/proto/hostagent/gen/hostagent.pb.go index 7afd4d1..f496b2c 100644 --- a/proto/hostagent/gen/hostagent.pb.go +++ b/proto/hostagent/gen/hostagent.pb.go @@ -1902,6 +1902,275 @@ func (*TerminateResponse) Descriptor() ([]byte, []int) { return file_hostagent_proto_rawDescGZIP(), []int{34} } +type MetricPoint struct { + state protoimpl.MessageState `protogen:"open.v1"` + TimestampUnix int64 `protobuf:"varint,1,opt,name=timestamp_unix,json=timestampUnix,proto3" json:"timestamp_unix,omitempty"` + CpuPct float64 `protobuf:"fixed64,2,opt,name=cpu_pct,json=cpuPct,proto3" json:"cpu_pct,omitempty"` + MemBytes int64 `protobuf:"varint,3,opt,name=mem_bytes,json=memBytes,proto3" json:"mem_bytes,omitempty"` + DiskBytes int64 `protobuf:"varint,4,opt,name=disk_bytes,json=diskBytes,proto3" json:"disk_bytes,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *MetricPoint) Reset() { + *x = MetricPoint{} + mi := &file_hostagent_proto_msgTypes[35] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *MetricPoint) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MetricPoint) ProtoMessage() {} + +func (x *MetricPoint) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[35] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MetricPoint.ProtoReflect.Descriptor instead. +func (*MetricPoint) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{35} +} + +func (x *MetricPoint) GetTimestampUnix() int64 { + if x != nil { + return x.TimestampUnix + } + return 0 +} + +func (x *MetricPoint) GetCpuPct() float64 { + if x != nil { + return x.CpuPct + } + return 0 +} + +func (x *MetricPoint) GetMemBytes() int64 { + if x != nil { + return x.MemBytes + } + return 0 +} + +func (x *MetricPoint) GetDiskBytes() int64 { + if x != nil { + return x.DiskBytes + } + return 0 +} + +type GetSandboxMetricsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + // Range tier: "10m", "2h", or "24h". + Range string `protobuf:"bytes,2,opt,name=range,proto3" json:"range,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetSandboxMetricsRequest) Reset() { + *x = GetSandboxMetricsRequest{} + mi := &file_hostagent_proto_msgTypes[36] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetSandboxMetricsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetSandboxMetricsRequest) ProtoMessage() {} + +func (x *GetSandboxMetricsRequest) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[36] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetSandboxMetricsRequest.ProtoReflect.Descriptor instead. +func (*GetSandboxMetricsRequest) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{36} +} + +func (x *GetSandboxMetricsRequest) GetSandboxId() string { + if x != nil { + return x.SandboxId + } + return "" +} + +func (x *GetSandboxMetricsRequest) GetRange() string { + if x != nil { + return x.Range + } + return "" +} + +type GetSandboxMetricsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Points []*MetricPoint `protobuf:"bytes,1,rep,name=points,proto3" json:"points,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *GetSandboxMetricsResponse) Reset() { + *x = GetSandboxMetricsResponse{} + mi := &file_hostagent_proto_msgTypes[37] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GetSandboxMetricsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetSandboxMetricsResponse) ProtoMessage() {} + +func (x *GetSandboxMetricsResponse) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[37] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetSandboxMetricsResponse.ProtoReflect.Descriptor instead. +func (*GetSandboxMetricsResponse) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{37} +} + +func (x *GetSandboxMetricsResponse) GetPoints() []*MetricPoint { + if x != nil { + return x.Points + } + return nil +} + +type FlushSandboxMetricsRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlushSandboxMetricsRequest) Reset() { + *x = FlushSandboxMetricsRequest{} + mi := &file_hostagent_proto_msgTypes[38] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlushSandboxMetricsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlushSandboxMetricsRequest) ProtoMessage() {} + +func (x *FlushSandboxMetricsRequest) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[38] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlushSandboxMetricsRequest.ProtoReflect.Descriptor instead. +func (*FlushSandboxMetricsRequest) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{38} +} + +func (x *FlushSandboxMetricsRequest) GetSandboxId() string { + if x != nil { + return x.SandboxId + } + return "" +} + +type FlushSandboxMetricsResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Points_10M []*MetricPoint `protobuf:"bytes,1,rep,name=points_10m,json=points10m,proto3" json:"points_10m,omitempty"` + Points_2H []*MetricPoint `protobuf:"bytes,2,rep,name=points_2h,json=points2h,proto3" json:"points_2h,omitempty"` + Points_24H []*MetricPoint `protobuf:"bytes,3,rep,name=points_24h,json=points24h,proto3" json:"points_24h,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *FlushSandboxMetricsResponse) Reset() { + *x = FlushSandboxMetricsResponse{} + mi := &file_hostagent_proto_msgTypes[39] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FlushSandboxMetricsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FlushSandboxMetricsResponse) ProtoMessage() {} + +func (x *FlushSandboxMetricsResponse) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[39] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FlushSandboxMetricsResponse.ProtoReflect.Descriptor instead. +func (*FlushSandboxMetricsResponse) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{39} +} + +func (x *FlushSandboxMetricsResponse) GetPoints_10M() []*MetricPoint { + if x != nil { + return x.Points_10M + } + return nil +} + +func (x *FlushSandboxMetricsResponse) GetPoints_2H() []*MetricPoint { + if x != nil { + return x.Points_2H + } + return nil +} + +func (x *FlushSandboxMetricsResponse) GetPoints_24H() []*MetricPoint { + if x != nil { + return x.Points_24H + } + return nil +} + var File_hostagent_proto protoreflect.FileDescriptor const file_hostagent_proto_rawDesc = "" + @@ -2029,8 +2298,28 @@ const file_hostagent_proto_rawDesc = "" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x15\n" + "\x13PingSandboxResponse\"\x12\n" + "\x10TerminateRequest\"\x13\n" + - "\x11TerminateResponse2\x9c\n" + + "\x11TerminateResponse\"\x89\x01\n" + + "\vMetricPoint\x12%\n" + + "\x0etimestamp_unix\x18\x01 \x01(\x03R\rtimestampUnix\x12\x17\n" + + "\acpu_pct\x18\x02 \x01(\x01R\x06cpuPct\x12\x1b\n" + + "\tmem_bytes\x18\x03 \x01(\x03R\bmemBytes\x12\x1d\n" + "\n" + + "disk_bytes\x18\x04 \x01(\x03R\tdiskBytes\"O\n" + + "\x18GetSandboxMetricsRequest\x12\x1d\n" + + "\n" + + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x14\n" + + "\x05range\x18\x02 \x01(\tR\x05range\"N\n" + + "\x19GetSandboxMetricsResponse\x121\n" + + "\x06points\x18\x01 \x03(\v2\x19.hostagent.v1.MetricPointR\x06points\";\n" + + "\x1aFlushSandboxMetricsRequest\x12\x1d\n" + + "\n" + + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\xc9\x01\n" + + "\x1bFlushSandboxMetricsResponse\x128\n" + + "\n" + + "points_10m\x18\x01 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints10m\x126\n" + + "\tpoints_2h\x18\x02 \x03(\v2\x19.hostagent.v1.MetricPointR\bpoints2h\x128\n" + + "\n" + + "points_24h\x18\x03 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints24h2\xee\v\n" + "\x10HostAgentService\x12X\n" + "\rCreateSandbox\x12\".hostagent.v1.CreateSandboxRequest\x1a#.hostagent.v1.CreateSandboxResponse\x12[\n" + "\x0eDestroySandbox\x12#.hostagent.v1.DestroySandboxRequest\x1a$.hostagent.v1.DestroySandboxResponse\x12U\n" + @@ -2047,7 +2336,9 @@ const file_hostagent_proto_rawDesc = "" + "\x0fWriteFileStream\x12$.hostagent.v1.WriteFileStreamRequest\x1a%.hostagent.v1.WriteFileStreamResponse(\x01\x12]\n" + "\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01\x12R\n" + "\vPingSandbox\x12 .hostagent.v1.PingSandboxRequest\x1a!.hostagent.v1.PingSandboxResponse\x12L\n" + - "\tTerminate\x12\x1e.hostagent.v1.TerminateRequest\x1a\x1f.hostagent.v1.TerminateResponseB\xb0\x01\n" + + "\tTerminate\x12\x1e.hostagent.v1.TerminateRequest\x1a\x1f.hostagent.v1.TerminateResponse\x12d\n" + + "\x11GetSandboxMetrics\x12&.hostagent.v1.GetSandboxMetricsRequest\x1a'.hostagent.v1.GetSandboxMetricsResponse\x12j\n" + + "\x13FlushSandboxMetrics\x12(.hostagent.v1.FlushSandboxMetricsRequest\x1a).hostagent.v1.FlushSandboxMetricsResponseB\xb0\x01\n" + "\x10com.hostagent.v1B\x0eHostagentProtoP\x01Z;git.omukk.dev/wrenn/sandbox/proto/hostagent/gen;hostagentv1\xa2\x02\x03HXX\xaa\x02\fHostagent.V1\xca\x02\fHostagent\\V1\xe2\x02\x18Hostagent\\V1\\GPBMetadata\xea\x02\rHostagent::V1b\x06proto3" var ( @@ -2062,43 +2353,48 @@ func file_hostagent_proto_rawDescGZIP() []byte { return file_hostagent_proto_rawDescData } -var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 35) +var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 40) var file_hostagent_proto_goTypes = []any{ - (*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest - (*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse - (*DestroySandboxRequest)(nil), // 2: hostagent.v1.DestroySandboxRequest - (*DestroySandboxResponse)(nil), // 3: hostagent.v1.DestroySandboxResponse - (*PauseSandboxRequest)(nil), // 4: hostagent.v1.PauseSandboxRequest - (*PauseSandboxResponse)(nil), // 5: hostagent.v1.PauseSandboxResponse - (*ResumeSandboxRequest)(nil), // 6: hostagent.v1.ResumeSandboxRequest - (*ResumeSandboxResponse)(nil), // 7: hostagent.v1.ResumeSandboxResponse - (*CreateSnapshotRequest)(nil), // 8: hostagent.v1.CreateSnapshotRequest - (*CreateSnapshotResponse)(nil), // 9: hostagent.v1.CreateSnapshotResponse - (*DeleteSnapshotRequest)(nil), // 10: hostagent.v1.DeleteSnapshotRequest - (*DeleteSnapshotResponse)(nil), // 11: hostagent.v1.DeleteSnapshotResponse - (*ExecRequest)(nil), // 12: hostagent.v1.ExecRequest - (*ExecResponse)(nil), // 13: hostagent.v1.ExecResponse - (*ListSandboxesRequest)(nil), // 14: hostagent.v1.ListSandboxesRequest - (*ListSandboxesResponse)(nil), // 15: hostagent.v1.ListSandboxesResponse - (*SandboxInfo)(nil), // 16: hostagent.v1.SandboxInfo - (*WriteFileRequest)(nil), // 17: hostagent.v1.WriteFileRequest - (*WriteFileResponse)(nil), // 18: hostagent.v1.WriteFileResponse - (*ReadFileRequest)(nil), // 19: hostagent.v1.ReadFileRequest - (*ReadFileResponse)(nil), // 20: hostagent.v1.ReadFileResponse - (*ExecStreamRequest)(nil), // 21: hostagent.v1.ExecStreamRequest - (*ExecStreamResponse)(nil), // 22: hostagent.v1.ExecStreamResponse - (*ExecStreamStart)(nil), // 23: hostagent.v1.ExecStreamStart - (*ExecStreamData)(nil), // 24: hostagent.v1.ExecStreamData - (*ExecStreamEnd)(nil), // 25: hostagent.v1.ExecStreamEnd - (*WriteFileStreamRequest)(nil), // 26: hostagent.v1.WriteFileStreamRequest - (*WriteFileStreamMeta)(nil), // 27: hostagent.v1.WriteFileStreamMeta - (*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse - (*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest - (*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse - (*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest - (*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse - (*TerminateRequest)(nil), // 33: hostagent.v1.TerminateRequest - (*TerminateResponse)(nil), // 34: hostagent.v1.TerminateResponse + (*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest + (*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse + (*DestroySandboxRequest)(nil), // 2: hostagent.v1.DestroySandboxRequest + (*DestroySandboxResponse)(nil), // 3: hostagent.v1.DestroySandboxResponse + (*PauseSandboxRequest)(nil), // 4: hostagent.v1.PauseSandboxRequest + (*PauseSandboxResponse)(nil), // 5: hostagent.v1.PauseSandboxResponse + (*ResumeSandboxRequest)(nil), // 6: hostagent.v1.ResumeSandboxRequest + (*ResumeSandboxResponse)(nil), // 7: hostagent.v1.ResumeSandboxResponse + (*CreateSnapshotRequest)(nil), // 8: hostagent.v1.CreateSnapshotRequest + (*CreateSnapshotResponse)(nil), // 9: hostagent.v1.CreateSnapshotResponse + (*DeleteSnapshotRequest)(nil), // 10: hostagent.v1.DeleteSnapshotRequest + (*DeleteSnapshotResponse)(nil), // 11: hostagent.v1.DeleteSnapshotResponse + (*ExecRequest)(nil), // 12: hostagent.v1.ExecRequest + (*ExecResponse)(nil), // 13: hostagent.v1.ExecResponse + (*ListSandboxesRequest)(nil), // 14: hostagent.v1.ListSandboxesRequest + (*ListSandboxesResponse)(nil), // 15: hostagent.v1.ListSandboxesResponse + (*SandboxInfo)(nil), // 16: hostagent.v1.SandboxInfo + (*WriteFileRequest)(nil), // 17: hostagent.v1.WriteFileRequest + (*WriteFileResponse)(nil), // 18: hostagent.v1.WriteFileResponse + (*ReadFileRequest)(nil), // 19: hostagent.v1.ReadFileRequest + (*ReadFileResponse)(nil), // 20: hostagent.v1.ReadFileResponse + (*ExecStreamRequest)(nil), // 21: hostagent.v1.ExecStreamRequest + (*ExecStreamResponse)(nil), // 22: hostagent.v1.ExecStreamResponse + (*ExecStreamStart)(nil), // 23: hostagent.v1.ExecStreamStart + (*ExecStreamData)(nil), // 24: hostagent.v1.ExecStreamData + (*ExecStreamEnd)(nil), // 25: hostagent.v1.ExecStreamEnd + (*WriteFileStreamRequest)(nil), // 26: hostagent.v1.WriteFileStreamRequest + (*WriteFileStreamMeta)(nil), // 27: hostagent.v1.WriteFileStreamMeta + (*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse + (*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest + (*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse + (*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest + (*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse + (*TerminateRequest)(nil), // 33: hostagent.v1.TerminateRequest + (*TerminateResponse)(nil), // 34: hostagent.v1.TerminateResponse + (*MetricPoint)(nil), // 35: hostagent.v1.MetricPoint + (*GetSandboxMetricsRequest)(nil), // 36: hostagent.v1.GetSandboxMetricsRequest + (*GetSandboxMetricsResponse)(nil), // 37: hostagent.v1.GetSandboxMetricsResponse + (*FlushSandboxMetricsRequest)(nil), // 38: hostagent.v1.FlushSandboxMetricsRequest + (*FlushSandboxMetricsResponse)(nil), // 39: hostagent.v1.FlushSandboxMetricsResponse } var file_hostagent_proto_depIdxs = []int32{ 16, // 0: hostagent.v1.ListSandboxesResponse.sandboxes:type_name -> hostagent.v1.SandboxInfo @@ -2106,41 +2402,49 @@ var file_hostagent_proto_depIdxs = []int32{ 24, // 2: hostagent.v1.ExecStreamResponse.data:type_name -> hostagent.v1.ExecStreamData 25, // 3: hostagent.v1.ExecStreamResponse.end:type_name -> hostagent.v1.ExecStreamEnd 27, // 4: hostagent.v1.WriteFileStreamRequest.meta:type_name -> hostagent.v1.WriteFileStreamMeta - 0, // 5: hostagent.v1.HostAgentService.CreateSandbox:input_type -> hostagent.v1.CreateSandboxRequest - 2, // 6: hostagent.v1.HostAgentService.DestroySandbox:input_type -> hostagent.v1.DestroySandboxRequest - 4, // 7: hostagent.v1.HostAgentService.PauseSandbox:input_type -> hostagent.v1.PauseSandboxRequest - 6, // 8: hostagent.v1.HostAgentService.ResumeSandbox:input_type -> hostagent.v1.ResumeSandboxRequest - 12, // 9: hostagent.v1.HostAgentService.Exec:input_type -> hostagent.v1.ExecRequest - 14, // 10: hostagent.v1.HostAgentService.ListSandboxes:input_type -> hostagent.v1.ListSandboxesRequest - 17, // 11: hostagent.v1.HostAgentService.WriteFile:input_type -> hostagent.v1.WriteFileRequest - 19, // 12: hostagent.v1.HostAgentService.ReadFile:input_type -> hostagent.v1.ReadFileRequest - 8, // 13: hostagent.v1.HostAgentService.CreateSnapshot:input_type -> hostagent.v1.CreateSnapshotRequest - 10, // 14: hostagent.v1.HostAgentService.DeleteSnapshot:input_type -> hostagent.v1.DeleteSnapshotRequest - 21, // 15: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest - 26, // 16: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest - 29, // 17: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest - 31, // 18: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest - 33, // 19: hostagent.v1.HostAgentService.Terminate:input_type -> hostagent.v1.TerminateRequest - 1, // 20: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse - 3, // 21: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse - 5, // 22: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse - 7, // 23: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse - 13, // 24: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse - 15, // 25: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse - 18, // 26: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse - 20, // 27: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse - 9, // 28: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse - 11, // 29: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse - 22, // 30: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse - 28, // 31: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse - 30, // 32: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse - 32, // 33: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse - 34, // 34: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse - 20, // [20:35] is the sub-list for method output_type - 5, // [5:20] is the sub-list for method input_type - 5, // [5:5] is the sub-list for extension type_name - 5, // [5:5] is the sub-list for extension extendee - 0, // [0:5] is the sub-list for field type_name + 35, // 5: hostagent.v1.GetSandboxMetricsResponse.points:type_name -> hostagent.v1.MetricPoint + 35, // 6: hostagent.v1.FlushSandboxMetricsResponse.points_10m:type_name -> hostagent.v1.MetricPoint + 35, // 7: hostagent.v1.FlushSandboxMetricsResponse.points_2h:type_name -> hostagent.v1.MetricPoint + 35, // 8: hostagent.v1.FlushSandboxMetricsResponse.points_24h:type_name -> hostagent.v1.MetricPoint + 0, // 9: hostagent.v1.HostAgentService.CreateSandbox:input_type -> hostagent.v1.CreateSandboxRequest + 2, // 10: hostagent.v1.HostAgentService.DestroySandbox:input_type -> hostagent.v1.DestroySandboxRequest + 4, // 11: hostagent.v1.HostAgentService.PauseSandbox:input_type -> hostagent.v1.PauseSandboxRequest + 6, // 12: hostagent.v1.HostAgentService.ResumeSandbox:input_type -> hostagent.v1.ResumeSandboxRequest + 12, // 13: hostagent.v1.HostAgentService.Exec:input_type -> hostagent.v1.ExecRequest + 14, // 14: hostagent.v1.HostAgentService.ListSandboxes:input_type -> hostagent.v1.ListSandboxesRequest + 17, // 15: hostagent.v1.HostAgentService.WriteFile:input_type -> hostagent.v1.WriteFileRequest + 19, // 16: hostagent.v1.HostAgentService.ReadFile:input_type -> hostagent.v1.ReadFileRequest + 8, // 17: hostagent.v1.HostAgentService.CreateSnapshot:input_type -> hostagent.v1.CreateSnapshotRequest + 10, // 18: hostagent.v1.HostAgentService.DeleteSnapshot:input_type -> hostagent.v1.DeleteSnapshotRequest + 21, // 19: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest + 26, // 20: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest + 29, // 21: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest + 31, // 22: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest + 33, // 23: hostagent.v1.HostAgentService.Terminate:input_type -> hostagent.v1.TerminateRequest + 36, // 24: hostagent.v1.HostAgentService.GetSandboxMetrics:input_type -> hostagent.v1.GetSandboxMetricsRequest + 38, // 25: hostagent.v1.HostAgentService.FlushSandboxMetrics:input_type -> hostagent.v1.FlushSandboxMetricsRequest + 1, // 26: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse + 3, // 27: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse + 5, // 28: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse + 7, // 29: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse + 13, // 30: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse + 15, // 31: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse + 18, // 32: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse + 20, // 33: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse + 9, // 34: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse + 11, // 35: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse + 22, // 36: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse + 28, // 37: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse + 30, // 38: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse + 32, // 39: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse + 34, // 40: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse + 37, // 41: hostagent.v1.HostAgentService.GetSandboxMetrics:output_type -> hostagent.v1.GetSandboxMetricsResponse + 39, // 42: hostagent.v1.HostAgentService.FlushSandboxMetrics:output_type -> hostagent.v1.FlushSandboxMetricsResponse + 26, // [26:43] is the sub-list for method output_type + 9, // [9:26] is the sub-list for method input_type + 9, // [9:9] is the sub-list for extension type_name + 9, // [9:9] is the sub-list for extension extendee + 0, // [0:9] is the sub-list for field type_name } func init() { file_hostagent_proto_init() } @@ -2167,7 +2471,7 @@ func file_hostagent_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_hostagent_proto_rawDesc), len(file_hostagent_proto_rawDesc)), NumEnums: 0, - NumMessages: 35, + NumMessages: 40, NumExtensions: 0, NumServices: 1, }, diff --git a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go index d144451..7f0fa70 100644 --- a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go +++ b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go @@ -77,6 +77,12 @@ const ( // HostAgentServiceTerminateProcedure is the fully-qualified name of the HostAgentService's // Terminate RPC. HostAgentServiceTerminateProcedure = "/hostagent.v1.HostAgentService/Terminate" + // HostAgentServiceGetSandboxMetricsProcedure is the fully-qualified name of the HostAgentService's + // GetSandboxMetrics RPC. + HostAgentServiceGetSandboxMetricsProcedure = "/hostagent.v1.HostAgentService/GetSandboxMetrics" + // HostAgentServiceFlushSandboxMetricsProcedure is the fully-qualified name of the + // HostAgentService's FlushSandboxMetrics RPC. + HostAgentServiceFlushSandboxMetricsProcedure = "/hostagent.v1.HostAgentService/FlushSandboxMetrics" ) // HostAgentServiceClient is a client for the hostagent.v1.HostAgentService service. @@ -115,6 +121,11 @@ type HostAgentServiceClient interface { // Called by the control plane immediately when a host is deleted so the // agent shuts down without waiting for the next heartbeat cycle. Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error) + // GetSandboxMetrics returns ring buffer metrics for a running sandbox. + GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) + // FlushSandboxMetrics returns all ring buffer tiers and clears them. + // Called by the control plane before pause/destroy to persist metrics to DB. + FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) } // NewHostAgentServiceClient constructs a client for the hostagent.v1.HostAgentService service. By @@ -218,26 +229,40 @@ func NewHostAgentServiceClient(httpClient connect.HTTPClient, baseURL string, op connect.WithSchema(hostAgentServiceMethods.ByName("Terminate")), connect.WithClientOptions(opts...), ), + getSandboxMetrics: connect.NewClient[gen.GetSandboxMetricsRequest, gen.GetSandboxMetricsResponse]( + httpClient, + baseURL+HostAgentServiceGetSandboxMetricsProcedure, + connect.WithSchema(hostAgentServiceMethods.ByName("GetSandboxMetrics")), + connect.WithClientOptions(opts...), + ), + flushSandboxMetrics: connect.NewClient[gen.FlushSandboxMetricsRequest, gen.FlushSandboxMetricsResponse]( + httpClient, + baseURL+HostAgentServiceFlushSandboxMetricsProcedure, + connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")), + connect.WithClientOptions(opts...), + ), } } // hostAgentServiceClient implements HostAgentServiceClient. type hostAgentServiceClient struct { - createSandbox *connect.Client[gen.CreateSandboxRequest, gen.CreateSandboxResponse] - destroySandbox *connect.Client[gen.DestroySandboxRequest, gen.DestroySandboxResponse] - pauseSandbox *connect.Client[gen.PauseSandboxRequest, gen.PauseSandboxResponse] - resumeSandbox *connect.Client[gen.ResumeSandboxRequest, gen.ResumeSandboxResponse] - exec *connect.Client[gen.ExecRequest, gen.ExecResponse] - listSandboxes *connect.Client[gen.ListSandboxesRequest, gen.ListSandboxesResponse] - writeFile *connect.Client[gen.WriteFileRequest, gen.WriteFileResponse] - readFile *connect.Client[gen.ReadFileRequest, gen.ReadFileResponse] - createSnapshot *connect.Client[gen.CreateSnapshotRequest, gen.CreateSnapshotResponse] - deleteSnapshot *connect.Client[gen.DeleteSnapshotRequest, gen.DeleteSnapshotResponse] - execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse] - writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse] - readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse] - pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse] - terminate *connect.Client[gen.TerminateRequest, gen.TerminateResponse] + createSandbox *connect.Client[gen.CreateSandboxRequest, gen.CreateSandboxResponse] + destroySandbox *connect.Client[gen.DestroySandboxRequest, gen.DestroySandboxResponse] + pauseSandbox *connect.Client[gen.PauseSandboxRequest, gen.PauseSandboxResponse] + resumeSandbox *connect.Client[gen.ResumeSandboxRequest, gen.ResumeSandboxResponse] + exec *connect.Client[gen.ExecRequest, gen.ExecResponse] + listSandboxes *connect.Client[gen.ListSandboxesRequest, gen.ListSandboxesResponse] + writeFile *connect.Client[gen.WriteFileRequest, gen.WriteFileResponse] + readFile *connect.Client[gen.ReadFileRequest, gen.ReadFileResponse] + createSnapshot *connect.Client[gen.CreateSnapshotRequest, gen.CreateSnapshotResponse] + deleteSnapshot *connect.Client[gen.DeleteSnapshotRequest, gen.DeleteSnapshotResponse] + execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse] + writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse] + readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse] + pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse] + terminate *connect.Client[gen.TerminateRequest, gen.TerminateResponse] + getSandboxMetrics *connect.Client[gen.GetSandboxMetricsRequest, gen.GetSandboxMetricsResponse] + flushSandboxMetrics *connect.Client[gen.FlushSandboxMetricsRequest, gen.FlushSandboxMetricsResponse] } // CreateSandbox calls hostagent.v1.HostAgentService.CreateSandbox. @@ -315,6 +340,16 @@ func (c *hostAgentServiceClient) Terminate(ctx context.Context, req *connect.Req return c.terminate.CallUnary(ctx, req) } +// GetSandboxMetrics calls hostagent.v1.HostAgentService.GetSandboxMetrics. +func (c *hostAgentServiceClient) GetSandboxMetrics(ctx context.Context, req *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) { + return c.getSandboxMetrics.CallUnary(ctx, req) +} + +// FlushSandboxMetrics calls hostagent.v1.HostAgentService.FlushSandboxMetrics. +func (c *hostAgentServiceClient) FlushSandboxMetrics(ctx context.Context, req *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) { + return c.flushSandboxMetrics.CallUnary(ctx, req) +} + // HostAgentServiceHandler is an implementation of the hostagent.v1.HostAgentService service. type HostAgentServiceHandler interface { // CreateSandbox boots a new microVM with the given configuration. @@ -351,6 +386,11 @@ type HostAgentServiceHandler interface { // Called by the control plane immediately when a host is deleted so the // agent shuts down without waiting for the next heartbeat cycle. Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error) + // GetSandboxMetrics returns ring buffer metrics for a running sandbox. + GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) + // FlushSandboxMetrics returns all ring buffer tiers and clears them. + // Called by the control plane before pause/destroy to persist metrics to DB. + FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) } // NewHostAgentServiceHandler builds an HTTP handler from the service implementation. It returns the @@ -450,6 +490,18 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han connect.WithSchema(hostAgentServiceMethods.ByName("Terminate")), connect.WithHandlerOptions(opts...), ) + hostAgentServiceGetSandboxMetricsHandler := connect.NewUnaryHandler( + HostAgentServiceGetSandboxMetricsProcedure, + svc.GetSandboxMetrics, + connect.WithSchema(hostAgentServiceMethods.ByName("GetSandboxMetrics")), + connect.WithHandlerOptions(opts...), + ) + hostAgentServiceFlushSandboxMetricsHandler := connect.NewUnaryHandler( + HostAgentServiceFlushSandboxMetricsProcedure, + svc.FlushSandboxMetrics, + connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")), + connect.WithHandlerOptions(opts...), + ) return "/hostagent.v1.HostAgentService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case HostAgentServiceCreateSandboxProcedure: @@ -482,6 +534,10 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han hostAgentServicePingSandboxHandler.ServeHTTP(w, r) case HostAgentServiceTerminateProcedure: hostAgentServiceTerminateHandler.ServeHTTP(w, r) + case HostAgentServiceGetSandboxMetricsProcedure: + hostAgentServiceGetSandboxMetricsHandler.ServeHTTP(w, r) + case HostAgentServiceFlushSandboxMetricsProcedure: + hostAgentServiceFlushSandboxMetricsHandler.ServeHTTP(w, r) default: http.NotFound(w, r) } @@ -550,3 +606,11 @@ func (UnimplementedHostAgentServiceHandler) PingSandbox(context.Context, *connec func (UnimplementedHostAgentServiceHandler) Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error) { return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.Terminate is not implemented")) } + +func (UnimplementedHostAgentServiceHandler) GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) { + return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.GetSandboxMetrics is not implemented")) +} + +func (UnimplementedHostAgentServiceHandler) FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) { + return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.FlushSandboxMetrics is not implemented")) +} diff --git a/proto/hostagent/hostagent.proto b/proto/hostagent/hostagent.proto index c9cfffa..214a84e 100644 --- a/proto/hostagent/hostagent.proto +++ b/proto/hostagent/hostagent.proto @@ -54,6 +54,13 @@ service HostAgentService { // agent shuts down without waiting for the next heartbeat cycle. rpc Terminate(TerminateRequest) returns (TerminateResponse); + // GetSandboxMetrics returns ring buffer metrics for a running sandbox. + rpc GetSandboxMetrics(GetSandboxMetricsRequest) returns (GetSandboxMetricsResponse); + + // FlushSandboxMetrics returns all ring buffer tiers and clears them. + // Called by the control plane before pause/destroy to persist metrics to DB. + rpc FlushSandboxMetrics(FlushSandboxMetricsRequest) returns (FlushSandboxMetricsResponse); + } message CreateSandboxRequest { @@ -248,3 +255,32 @@ message PingSandboxResponse {} message TerminateRequest {} message TerminateResponse {} + +// ── Metrics ────────────────────────────────────────────────────────── + +message MetricPoint { + int64 timestamp_unix = 1; + double cpu_pct = 2; + int64 mem_bytes = 3; + int64 disk_bytes = 4; +} + +message GetSandboxMetricsRequest { + string sandbox_id = 1; + // Range tier: "10m", "2h", or "24h". + string range = 2; +} + +message GetSandboxMetricsResponse { + repeated MetricPoint points = 1; +} + +message FlushSandboxMetricsRequest { + string sandbox_id = 1; +} + +message FlushSandboxMetricsResponse { + repeated MetricPoint points_10m = 1; + repeated MetricPoint points_2h = 2; + repeated MetricPoint points_24h = 3; +}