Add per-sandbox CPU/memory/disk metrics collection
Samples /proc/{fc_pid}/stat (CPU%), /proc/{fc_pid}/status (VmRSS), and
stat() on CoW files at 500ms intervals per running sandbox. Three tiered
ring buffers downsample into 30s and 5min averages for 10min/2h/24h
retention. Metrics are flushed to DB on pause (all tiers) and destroy
(24h only). New GetSandboxMetrics and FlushSandboxMetrics RPCs on the
host agent, proxied through GET /v1/sandboxes/{id}/metrics?range= on
the control plane. Returns live data for running sandboxes, DB data for
paused, and 404 for stopped.
This commit is contained in:
16
db/migrations/20260325135035_add_sandbox_metric_points.sql
Normal file
16
db/migrations/20260325135035_add_sandbox_metric_points.sql
Normal file
@ -0,0 +1,16 @@
|
||||
-- +goose Up
|
||||
CREATE TABLE sandbox_metric_points (
|
||||
sandbox_id TEXT NOT NULL,
|
||||
tier TEXT NOT NULL CHECK (tier IN ('10m', '2h', '24h')),
|
||||
ts BIGINT NOT NULL,
|
||||
cpu_pct FLOAT8 NOT NULL DEFAULT 0,
|
||||
mem_bytes BIGINT NOT NULL DEFAULT 0,
|
||||
disk_bytes BIGINT NOT NULL DEFAULT 0,
|
||||
PRIMARY KEY (sandbox_id, tier, ts)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_sandbox_metric_points_sandbox_tier
|
||||
ON sandbox_metric_points (sandbox_id, tier);
|
||||
|
||||
-- +goose Down
|
||||
DROP TABLE IF EXISTS sandbox_metric_points;
|
||||
@ -27,6 +27,30 @@ WHERE team_id = $1
|
||||
DELETE FROM sandbox_metrics_snapshots
|
||||
WHERE sampled_at < NOW() - INTERVAL '60 days';
|
||||
|
||||
-- name: InsertSandboxMetricPoint :exec
|
||||
INSERT INTO sandbox_metric_points (sandbox_id, tier, ts, cpu_pct, mem_bytes, disk_bytes)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (sandbox_id, tier, ts) DO NOTHING;
|
||||
|
||||
-- name: GetSandboxMetricPoints :many
|
||||
SELECT ts, cpu_pct, mem_bytes, disk_bytes
|
||||
FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1 AND tier = $2
|
||||
ORDER BY ts ASC;
|
||||
|
||||
-- name: DeleteSandboxMetricPoints :exec
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1;
|
||||
|
||||
-- name: DeleteSandboxMetricPointsByTier :exec
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1 AND tier = $2;
|
||||
|
||||
-- name: PruneSandboxMetricPoints :exec
|
||||
-- Remove metric points older than 30 days for destroyed sandboxes.
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE ts < EXTRACT(EPOCH FROM NOW() - INTERVAL '30 days')::BIGINT;
|
||||
|
||||
-- name: SampleSandboxMetrics :many
|
||||
-- Aggregates per-team resource usage from the live sandboxes table.
|
||||
-- Groups by all teams that have any sandbox row (including stopped) so that
|
||||
|
||||
130
internal/api/handlers_metrics.go
Normal file
130
internal/api/handlers_metrics.go
Normal file
@ -0,0 +1,130 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"connectrpc.com/connect"
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"git.omukk.dev/wrenn/sandbox/internal/auth"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/db"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/lifecycle"
|
||||
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
|
||||
)
|
||||
|
||||
type sandboxMetricsHandler struct {
|
||||
db *db.Queries
|
||||
pool *lifecycle.HostClientPool
|
||||
}
|
||||
|
||||
func newSandboxMetricsHandler(db *db.Queries, pool *lifecycle.HostClientPool) *sandboxMetricsHandler {
|
||||
return &sandboxMetricsHandler{db: db, pool: pool}
|
||||
}
|
||||
|
||||
type metricPointResponse struct {
|
||||
TimestampUnix int64 `json:"timestamp_unix"`
|
||||
CPUPct float64 `json:"cpu_pct"`
|
||||
MemBytes int64 `json:"mem_bytes"`
|
||||
DiskBytes int64 `json:"disk_bytes"`
|
||||
}
|
||||
|
||||
type metricsResponse struct {
|
||||
SandboxID string `json:"sandbox_id"`
|
||||
Range string `json:"range"`
|
||||
Points []metricPointResponse `json:"points"`
|
||||
}
|
||||
|
||||
// GetMetrics handles GET /v1/sandboxes/{id}/metrics?range=10m|2h|24h.
|
||||
func (h *sandboxMetricsHandler) GetMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
sandboxID := chi.URLParam(r, "id")
|
||||
ctx := r.Context()
|
||||
ac := auth.MustFromContext(ctx)
|
||||
|
||||
rangeTier := r.URL.Query().Get("range")
|
||||
if rangeTier == "" {
|
||||
rangeTier = "10m"
|
||||
}
|
||||
if rangeTier != "10m" && rangeTier != "2h" && rangeTier != "24h" {
|
||||
writeError(w, http.StatusBadRequest, "invalid_request", "range must be 10m, 2h, or 24h")
|
||||
return
|
||||
}
|
||||
|
||||
sb, err := h.db.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: ac.TeamID})
|
||||
if err != nil {
|
||||
writeError(w, http.StatusNotFound, "not_found", "sandbox not found")
|
||||
return
|
||||
}
|
||||
|
||||
switch sb.Status {
|
||||
case "running":
|
||||
h.getFromAgent(w, r, sandboxID, rangeTier, sb.HostID)
|
||||
case "paused":
|
||||
h.getFromDB(ctx, w, sandboxID, rangeTier)
|
||||
default:
|
||||
writeError(w, http.StatusNotFound, "not_found", "metrics not available for sandbox in state: "+sb.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *sandboxMetricsHandler) getFromAgent(w http.ResponseWriter, r *http.Request, sandboxID, rangeTier, hostID string) {
|
||||
ctx := r.Context()
|
||||
|
||||
agent, err := agentForHost(ctx, h.db, h.pool, hostID)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "host_unavailable", "sandbox host is not reachable")
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := agent.GetSandboxMetrics(ctx, connect.NewRequest(&pb.GetSandboxMetricsRequest{
|
||||
SandboxId: sandboxID,
|
||||
Range: rangeTier,
|
||||
}))
|
||||
if err != nil {
|
||||
status, code, msg := agentErrToHTTP(err)
|
||||
writeError(w, status, code, msg)
|
||||
return
|
||||
}
|
||||
|
||||
points := make([]metricPointResponse, len(resp.Msg.Points))
|
||||
for i, p := range resp.Msg.Points {
|
||||
points[i] = metricPointResponse{
|
||||
TimestampUnix: p.TimestampUnix,
|
||||
CPUPct: p.CpuPct,
|
||||
MemBytes: p.MemBytes,
|
||||
DiskBytes: p.DiskBytes,
|
||||
}
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, metricsResponse{
|
||||
SandboxID: sandboxID,
|
||||
Range: rangeTier,
|
||||
Points: points,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *sandboxMetricsHandler) getFromDB(ctx context.Context, w http.ResponseWriter, sandboxID, rangeTier string) {
|
||||
rows, err := h.db.GetSandboxMetricPoints(ctx, db.GetSandboxMetricPointsParams{
|
||||
SandboxID: sandboxID,
|
||||
Tier: rangeTier,
|
||||
})
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, "internal_error", "failed to read metrics")
|
||||
return
|
||||
}
|
||||
|
||||
points := make([]metricPointResponse, len(rows))
|
||||
for i, row := range rows {
|
||||
points[i] = metricPointResponse{
|
||||
TimestampUnix: row.Ts,
|
||||
CPUPct: row.CpuPct,
|
||||
MemBytes: row.MemBytes,
|
||||
DiskBytes: row.DiskBytes,
|
||||
}
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, metricsResponse{
|
||||
SandboxID: sandboxID,
|
||||
Range: rangeTier,
|
||||
Points: points,
|
||||
})
|
||||
}
|
||||
@ -751,6 +751,60 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/sandboxes/{id}/metrics:
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
||||
get:
|
||||
summary: Get per-sandbox resource metrics
|
||||
operationId: getSandboxMetrics
|
||||
tags: [sandboxes]
|
||||
security:
|
||||
- apiKeyAuth: []
|
||||
- bearerAuth: []
|
||||
description: |
|
||||
Returns time-series CPU, memory, and disk metrics for a sandbox.
|
||||
Three tiers are available with different granularity and retention:
|
||||
- `10m`: 500ms samples, last 10 minutes
|
||||
- `2h`: 30-second averages, last 2 hours
|
||||
- `24h`: 5-minute averages, last 24 hours
|
||||
|
||||
For running sandboxes, data comes from the host agent's in-memory
|
||||
ring buffer. For paused sandboxes, data is read from persisted
|
||||
snapshots in the database. Stopped/destroyed sandboxes return 404.
|
||||
parameters:
|
||||
- name: range
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
enum: ["10m", "2h", "24h"]
|
||||
default: "10m"
|
||||
description: Time range tier to query
|
||||
responses:
|
||||
"200":
|
||||
description: Metrics retrieved
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/SandboxMetrics"
|
||||
"400":
|
||||
description: Invalid range parameter
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"404":
|
||||
description: Sandbox not found or metrics not available
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/sandboxes/{id}/pause:
|
||||
parameters:
|
||||
- name: id
|
||||
@ -1981,6 +2035,38 @@ components:
|
||||
items:
|
||||
$ref: "#/components/schemas/TeamMember"
|
||||
|
||||
SandboxMetrics:
|
||||
type: object
|
||||
properties:
|
||||
sandbox_id:
|
||||
type: string
|
||||
range:
|
||||
type: string
|
||||
enum: ["10m", "2h", "24h"]
|
||||
points:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/MetricPoint"
|
||||
|
||||
MetricPoint:
|
||||
type: object
|
||||
properties:
|
||||
timestamp_unix:
|
||||
type: integer
|
||||
format: int64
|
||||
cpu_pct:
|
||||
type: number
|
||||
format: double
|
||||
description: "CPU utilization percentage (0-100), normalized to vCPU count"
|
||||
mem_bytes:
|
||||
type: integer
|
||||
format: int64
|
||||
description: "Resident memory in bytes (VmRSS of Firecracker process)"
|
||||
disk_bytes:
|
||||
type: integer
|
||||
format: int64
|
||||
description: "Allocated disk bytes for the CoW sparse file"
|
||||
|
||||
Error:
|
||||
type: object
|
||||
properties:
|
||||
|
||||
@ -64,6 +64,7 @@ func New(
|
||||
usersH := newUsersHandler(teamSvc)
|
||||
auditH := newAuditHandler(auditSvc)
|
||||
statsH := newStatsHandler(statsSvc)
|
||||
metricsH := newSandboxMetricsHandler(queries, pool)
|
||||
|
||||
// OpenAPI spec and docs.
|
||||
r.Get("/openapi.yaml", serveOpenAPI)
|
||||
@ -125,6 +126,7 @@ func New(
|
||||
r.Post("/files/read", files.Download)
|
||||
r.Post("/files/stream/write", filesStream.StreamUpload)
|
||||
r.Post("/files/stream/read", filesStream.StreamDownload)
|
||||
r.Get("/metrics", metricsH.GetMetrics)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@ -9,6 +9,31 @@ import (
|
||||
"context"
|
||||
)
|
||||
|
||||
const deleteSandboxMetricPoints = `-- name: DeleteSandboxMetricPoints :exec
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteSandboxMetricPoints(ctx context.Context, sandboxID string) error {
|
||||
_, err := q.db.Exec(ctx, deleteSandboxMetricPoints, sandboxID)
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteSandboxMetricPointsByTier = `-- name: DeleteSandboxMetricPointsByTier :exec
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1 AND tier = $2
|
||||
`
|
||||
|
||||
type DeleteSandboxMetricPointsByTierParams struct {
|
||||
SandboxID string `json:"sandbox_id"`
|
||||
Tier string `json:"tier"`
|
||||
}
|
||||
|
||||
func (q *Queries) DeleteSandboxMetricPointsByTier(ctx context.Context, arg DeleteSandboxMetricPointsByTierParams) error {
|
||||
_, err := q.db.Exec(ctx, deleteSandboxMetricPointsByTier, arg.SandboxID, arg.Tier)
|
||||
return err
|
||||
}
|
||||
|
||||
const getLiveMetrics = `-- name: GetLiveMetrics :one
|
||||
SELECT
|
||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||
@ -58,6 +83,50 @@ func (q *Queries) GetPeakMetrics(ctx context.Context, teamID string) (GetPeakMet
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getSandboxMetricPoints = `-- name: GetSandboxMetricPoints :many
|
||||
SELECT ts, cpu_pct, mem_bytes, disk_bytes
|
||||
FROM sandbox_metric_points
|
||||
WHERE sandbox_id = $1 AND tier = $2
|
||||
ORDER BY ts ASC
|
||||
`
|
||||
|
||||
type GetSandboxMetricPointsParams struct {
|
||||
SandboxID string `json:"sandbox_id"`
|
||||
Tier string `json:"tier"`
|
||||
}
|
||||
|
||||
type GetSandboxMetricPointsRow struct {
|
||||
Ts int64 `json:"ts"`
|
||||
CpuPct float64 `json:"cpu_pct"`
|
||||
MemBytes int64 `json:"mem_bytes"`
|
||||
DiskBytes int64 `json:"disk_bytes"`
|
||||
}
|
||||
|
||||
func (q *Queries) GetSandboxMetricPoints(ctx context.Context, arg GetSandboxMetricPointsParams) ([]GetSandboxMetricPointsRow, error) {
|
||||
rows, err := q.db.Query(ctx, getSandboxMetricPoints, arg.SandboxID, arg.Tier)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var items []GetSandboxMetricPointsRow
|
||||
for rows.Next() {
|
||||
var i GetSandboxMetricPointsRow
|
||||
if err := rows.Scan(
|
||||
&i.Ts,
|
||||
&i.CpuPct,
|
||||
&i.MemBytes,
|
||||
&i.DiskBytes,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
items = append(items, i)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
const insertMetricsSnapshot = `-- name: InsertMetricsSnapshot :exec
|
||||
INSERT INTO sandbox_metrics_snapshots (team_id, running_count, vcpus_reserved, memory_mb_reserved)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
@ -80,6 +149,33 @@ func (q *Queries) InsertMetricsSnapshot(ctx context.Context, arg InsertMetricsSn
|
||||
return err
|
||||
}
|
||||
|
||||
const insertSandboxMetricPoint = `-- name: InsertSandboxMetricPoint :exec
|
||||
INSERT INTO sandbox_metric_points (sandbox_id, tier, ts, cpu_pct, mem_bytes, disk_bytes)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
ON CONFLICT (sandbox_id, tier, ts) DO NOTHING
|
||||
`
|
||||
|
||||
type InsertSandboxMetricPointParams struct {
|
||||
SandboxID string `json:"sandbox_id"`
|
||||
Tier string `json:"tier"`
|
||||
Ts int64 `json:"ts"`
|
||||
CpuPct float64 `json:"cpu_pct"`
|
||||
MemBytes int64 `json:"mem_bytes"`
|
||||
DiskBytes int64 `json:"disk_bytes"`
|
||||
}
|
||||
|
||||
func (q *Queries) InsertSandboxMetricPoint(ctx context.Context, arg InsertSandboxMetricPointParams) error {
|
||||
_, err := q.db.Exec(ctx, insertSandboxMetricPoint,
|
||||
arg.SandboxID,
|
||||
arg.Tier,
|
||||
arg.Ts,
|
||||
arg.CpuPct,
|
||||
arg.MemBytes,
|
||||
arg.DiskBytes,
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
const pruneOldMetrics = `-- name: PruneOldMetrics :exec
|
||||
DELETE FROM sandbox_metrics_snapshots
|
||||
WHERE sampled_at < NOW() - INTERVAL '60 days'
|
||||
@ -90,6 +186,17 @@ func (q *Queries) PruneOldMetrics(ctx context.Context) error {
|
||||
return err
|
||||
}
|
||||
|
||||
const pruneSandboxMetricPoints = `-- name: PruneSandboxMetricPoints :exec
|
||||
DELETE FROM sandbox_metric_points
|
||||
WHERE ts < EXTRACT(EPOCH FROM NOW() - INTERVAL '30 days')::BIGINT
|
||||
`
|
||||
|
||||
// Remove metric points older than 30 days for destroyed sandboxes.
|
||||
func (q *Queries) PruneSandboxMetricPoints(ctx context.Context) error {
|
||||
_, err := q.db.Exec(ctx, pruneSandboxMetricPoints)
|
||||
return err
|
||||
}
|
||||
|
||||
const sampleSandboxMetrics = `-- name: SampleSandboxMetrics :many
|
||||
SELECT
|
||||
team_id,
|
||||
|
||||
@ -99,6 +99,15 @@ type Sandbox struct {
|
||||
TeamID string `json:"team_id"`
|
||||
}
|
||||
|
||||
type SandboxMetricPoint struct {
|
||||
SandboxID string `json:"sandbox_id"`
|
||||
Tier string `json:"tier"`
|
||||
Ts int64 `json:"ts"`
|
||||
CpuPct float64 `json:"cpu_pct"`
|
||||
MemBytes int64 `json:"mem_bytes"`
|
||||
DiskBytes int64 `json:"disk_bytes"`
|
||||
}
|
||||
|
||||
type SandboxMetricsSnapshot struct {
|
||||
ID int64 `json:"id"`
|
||||
TeamID string `json:"team_id"`
|
||||
|
||||
@ -426,3 +426,55 @@ func (s *Server) Terminate(
|
||||
}
|
||||
return connect.NewResponse(&pb.TerminateResponse{}), nil
|
||||
}
|
||||
|
||||
func (s *Server) GetSandboxMetrics(
|
||||
_ context.Context,
|
||||
req *connect.Request[pb.GetSandboxMetricsRequest],
|
||||
) (*connect.Response[pb.GetSandboxMetricsResponse], error) {
|
||||
msg := req.Msg
|
||||
|
||||
points, err := s.mgr.GetMetrics(msg.SandboxId, msg.Range)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return nil, connect.NewError(connect.CodeNotFound, err)
|
||||
}
|
||||
if strings.Contains(err.Error(), "invalid range") {
|
||||
return nil, connect.NewError(connect.CodeInvalidArgument, err)
|
||||
}
|
||||
return nil, connect.NewError(connect.CodeInternal, err)
|
||||
}
|
||||
|
||||
return connect.NewResponse(&pb.GetSandboxMetricsResponse{Points: metricPointsToPB(points)}), nil
|
||||
}
|
||||
|
||||
func (s *Server) FlushSandboxMetrics(
|
||||
_ context.Context,
|
||||
req *connect.Request[pb.FlushSandboxMetricsRequest],
|
||||
) (*connect.Response[pb.FlushSandboxMetricsResponse], error) {
|
||||
pts10m, pts2h, pts24h, err := s.mgr.FlushMetrics(req.Msg.SandboxId)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "not found") {
|
||||
return nil, connect.NewError(connect.CodeNotFound, err)
|
||||
}
|
||||
return nil, connect.NewError(connect.CodeInternal, err)
|
||||
}
|
||||
|
||||
return connect.NewResponse(&pb.FlushSandboxMetricsResponse{
|
||||
Points_10M: metricPointsToPB(pts10m),
|
||||
Points_2H: metricPointsToPB(pts2h),
|
||||
Points_24H: metricPointsToPB(pts24h),
|
||||
}), nil
|
||||
}
|
||||
|
||||
func metricPointsToPB(pts []sandbox.MetricPoint) []*pb.MetricPoint {
|
||||
out := make([]*pb.MetricPoint, len(pts))
|
||||
for i, p := range pts {
|
||||
out[i] = &pb.MetricPoint{
|
||||
TimestampUnix: p.Timestamp.Unix(),
|
||||
CpuPct: p.CPUPct,
|
||||
MemBytes: p.MemBytes,
|
||||
DiskBytes: p.DiskBytes,
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@ -58,6 +58,12 @@ type sandboxState struct {
|
||||
// sandbox was restored. Non-nil means re-pause should use "Diff" snapshot
|
||||
// type instead of "Full", avoiding the UFFD fault-in storm.
|
||||
parent *snapshotParent
|
||||
|
||||
// Metrics sampling state.
|
||||
fcPID int // Firecracker process PID (child of unshare wrapper)
|
||||
ring *metricsRing // tiered ring buffers for CPU/mem/disk metrics
|
||||
samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine
|
||||
samplerDone chan struct{} // closed when the sampling goroutine exits
|
||||
}
|
||||
|
||||
// snapshotParent stores the previous generation's snapshot state so that
|
||||
@ -232,6 +238,8 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus,
|
||||
m.boxes[sandboxID] = sb
|
||||
m.mu.Unlock()
|
||||
|
||||
m.startSampler(sb)
|
||||
|
||||
slog.Info("sandbox created",
|
||||
"id", sandboxID,
|
||||
"template", template,
|
||||
@ -265,6 +273,7 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error {
|
||||
|
||||
// cleanup tears down all resources for a sandbox.
|
||||
func (m *Manager) cleanup(ctx context.Context, sb *sandboxState) {
|
||||
m.stopSampler(sb)
|
||||
if err := m.vm.Destroy(ctx, sb.ID); err != nil {
|
||||
slog.Warn("vm destroy error", "id", sb.ID, "error", err)
|
||||
}
|
||||
@ -668,6 +677,8 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string, timeoutSec int)
|
||||
m.boxes[sandboxID] = sb
|
||||
m.mu.Unlock()
|
||||
|
||||
m.startSampler(sb)
|
||||
|
||||
// Don't delete snapshot dir — diff files are needed for re-pause.
|
||||
// The CoW file was already moved out. The dir will be cleaned up
|
||||
// on destroy or overwritten on re-pause.
|
||||
@ -987,6 +998,8 @@ func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotNam
|
||||
m.boxes[sandboxID] = sb
|
||||
m.mu.Unlock()
|
||||
|
||||
m.startSampler(sb)
|
||||
|
||||
slog.Info("sandbox created from snapshot",
|
||||
"id", sandboxID,
|
||||
"snapshot", snapshotName,
|
||||
@ -1213,6 +1226,158 @@ func warnErr(msg string, id string, err error) {
|
||||
}
|
||||
}
|
||||
|
||||
// startSampler resolves the Firecracker child PID and starts a background
|
||||
// goroutine that samples CPU/mem/disk at 500ms intervals into the ring buffer.
|
||||
// Must be called after the sandbox is registered in m.boxes.
|
||||
func (m *Manager) startSampler(sb *sandboxState) {
|
||||
// Resolve the Firecracker PID (child of unshare wrapper).
|
||||
v, ok := m.vm.Get(sb.ID)
|
||||
if !ok {
|
||||
slog.Warn("metrics: VM not found, skipping sampler", "id", sb.ID)
|
||||
return
|
||||
}
|
||||
unshPID := v.PID()
|
||||
|
||||
var fcPID int
|
||||
for attempt := 0; attempt < 5; attempt++ {
|
||||
var err error
|
||||
fcPID, err = findChildPID(unshPID)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if attempt == 4 {
|
||||
slog.Warn("metrics: could not resolve FC PID, skipping sampler", "id", sb.ID, "error", err)
|
||||
return
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
|
||||
sb.fcPID = fcPID
|
||||
sb.ring = newMetricsRing()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
sb.samplerCancel = cancel
|
||||
sb.samplerDone = make(chan struct{})
|
||||
|
||||
// Read initial CPU counters for delta calculation.
|
||||
// Passed to goroutine as local state — no shared mutation.
|
||||
initialCPU, err := readCPUStat(fcPID)
|
||||
if err != nil {
|
||||
slog.Warn("metrics: could not read initial CPU stat", "id", sb.ID, "error", err)
|
||||
}
|
||||
|
||||
go m.samplerLoop(ctx, sb, fcPID, sb.VCPUs, initialCPU)
|
||||
}
|
||||
|
||||
// samplerLoop samples /proc metrics at 500ms intervals.
|
||||
// lastCPU is goroutine-local to avoid shared-state races.
|
||||
func (m *Manager) samplerLoop(ctx context.Context, sb *sandboxState, fcPID, vcpus int, lastCPU cpuStat) {
|
||||
defer close(sb.samplerDone)
|
||||
|
||||
ticker := time.NewTicker(500 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
clkTck := 100.0 // sysconf(_SC_CLK_TCK), almost always 100 on Linux
|
||||
lastTime := time.Now()
|
||||
cpuInitialized := lastCPU != (cpuStat{})
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case now := <-ticker.C:
|
||||
elapsed := now.Sub(lastTime).Seconds()
|
||||
lastTime = now
|
||||
|
||||
// CPU: delta jiffies / (elapsed * CLK_TCK * vcpus) * 100
|
||||
var cpuPct float64
|
||||
cur, err := readCPUStat(fcPID)
|
||||
if err == nil {
|
||||
if cpuInitialized && elapsed > 0 && vcpus > 0 {
|
||||
deltaJiffies := float64((cur.utime + cur.stime) - (lastCPU.utime + lastCPU.stime))
|
||||
cpuPct = (deltaJiffies / (elapsed * clkTck * float64(vcpus))) * 100.0
|
||||
if cpuPct > 100.0 {
|
||||
cpuPct = 100.0
|
||||
}
|
||||
if cpuPct < 0 {
|
||||
cpuPct = 0
|
||||
}
|
||||
}
|
||||
lastCPU = cur
|
||||
cpuInitialized = true
|
||||
}
|
||||
|
||||
// Memory: VmRSS of the Firecracker process.
|
||||
memBytes, _ := readMemRSS(fcPID)
|
||||
|
||||
// Disk: allocated bytes of the CoW sparse file.
|
||||
var diskBytes int64
|
||||
if sb.dmDevice != nil {
|
||||
diskBytes, _ = readDiskAllocated(sb.dmDevice.CowPath)
|
||||
}
|
||||
|
||||
sb.ring.Push(MetricPoint{
|
||||
Timestamp: now,
|
||||
CPUPct: cpuPct,
|
||||
MemBytes: memBytes,
|
||||
DiskBytes: diskBytes,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stopSampler stops the metrics sampling goroutine and waits for it to exit.
|
||||
func (m *Manager) stopSampler(sb *sandboxState) {
|
||||
if sb.samplerCancel != nil {
|
||||
sb.samplerCancel()
|
||||
<-sb.samplerDone
|
||||
sb.samplerCancel = nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetMetrics returns the ring buffer data for the given range tier.
|
||||
// Valid ranges: "10m", "2h", "24h".
|
||||
func (m *Manager) GetMetrics(sandboxID, rangeTier string) ([]MetricPoint, error) {
|
||||
m.mu.RLock()
|
||||
sb, ok := m.boxes[sandboxID]
|
||||
m.mu.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("sandbox not found: %s", sandboxID)
|
||||
}
|
||||
if sb.ring == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch rangeTier {
|
||||
case "10m":
|
||||
return sb.ring.Get10m(), nil
|
||||
case "2h":
|
||||
return sb.ring.Get2h(), nil
|
||||
case "24h":
|
||||
return sb.ring.Get24h(), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid range: %s (valid: 10m, 2h, 24h)", rangeTier)
|
||||
}
|
||||
}
|
||||
|
||||
// FlushMetrics returns all three tier ring buffers, clears the ring, and
|
||||
// stops the sampler goroutine. Called by the control plane before pause/destroy.
|
||||
func (m *Manager) FlushMetrics(sandboxID string) (pts10m, pts2h, pts24h []MetricPoint, err error) {
|
||||
m.mu.RLock()
|
||||
sb, ok := m.boxes[sandboxID]
|
||||
m.mu.RUnlock()
|
||||
if !ok {
|
||||
return nil, nil, nil, fmt.Errorf("sandbox not found: %s", sandboxID)
|
||||
}
|
||||
|
||||
m.stopSampler(sb)
|
||||
if sb.ring == nil {
|
||||
return nil, nil, nil, nil
|
||||
}
|
||||
pts10m, pts2h, pts24h = sb.ring.Flush()
|
||||
return pts10m, pts2h, pts24h, nil
|
||||
}
|
||||
|
||||
// copyFile copies a regular file from src to dst using streaming I/O.
|
||||
func copyFile(src, dst string) error {
|
||||
sf, err := os.Open(src)
|
||||
|
||||
178
internal/sandbox/metrics.go
Normal file
178
internal/sandbox/metrics.go
Normal file
@ -0,0 +1,178 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MetricPoint holds one metrics sample.
|
||||
type MetricPoint struct {
|
||||
Timestamp time.Time
|
||||
CPUPct float64
|
||||
MemBytes int64
|
||||
DiskBytes int64
|
||||
}
|
||||
|
||||
// Ring buffer capacity constants.
|
||||
const (
|
||||
ring10mCap = 1200 // 500ms × 1200 = 10 min
|
||||
ring2hCap = 240 // 30s × 240 = 2 h
|
||||
ring24hCap = 288 // 5min × 288 = 24 h
|
||||
|
||||
downsample2hEvery = 60 // 60 × 500ms = 30s
|
||||
downsample24hEvery = 10 // 10 × 30s = 5min
|
||||
)
|
||||
|
||||
// metricsRing holds three tiered ring buffers with automatic downsampling
|
||||
// from the finest tier into coarser tiers.
|
||||
type metricsRing struct {
|
||||
mu sync.Mutex
|
||||
|
||||
// 10-minute tier: 500ms samples.
|
||||
buf10m [ring10mCap]MetricPoint
|
||||
idx10m int
|
||||
count10m int
|
||||
|
||||
// 2-hour tier: 30s averages.
|
||||
buf2h [ring2hCap]MetricPoint
|
||||
idx2h int
|
||||
count2h int
|
||||
|
||||
// 24-hour tier: 5min averages.
|
||||
buf24h [ring24hCap]MetricPoint
|
||||
idx24h int
|
||||
count24h int
|
||||
|
||||
// Accumulators for downsampling.
|
||||
acc500ms [downsample2hEvery]MetricPoint
|
||||
acc500msN int
|
||||
|
||||
acc30s [downsample24hEvery]MetricPoint
|
||||
acc30sN int
|
||||
}
|
||||
|
||||
// newMetricsRing creates an empty metrics ring buffer.
|
||||
func newMetricsRing() *metricsRing {
|
||||
return &metricsRing{}
|
||||
}
|
||||
|
||||
// Push adds a 500ms sample to the finest tier and triggers downsampling
|
||||
// into coarser tiers when enough samples have accumulated.
|
||||
func (r *metricsRing) Push(p MetricPoint) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
// Write to 10m ring.
|
||||
r.buf10m[r.idx10m] = p
|
||||
r.idx10m = (r.idx10m + 1) % ring10mCap
|
||||
if r.count10m < ring10mCap {
|
||||
r.count10m++
|
||||
}
|
||||
|
||||
// Accumulate for 2h downsample.
|
||||
r.acc500ms[r.acc500msN] = p
|
||||
r.acc500msN++
|
||||
if r.acc500msN == downsample2hEvery {
|
||||
avg := averagePoints(r.acc500ms[:downsample2hEvery])
|
||||
r.push2h(avg)
|
||||
r.acc500msN = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricsRing) push2h(p MetricPoint) {
|
||||
r.buf2h[r.idx2h] = p
|
||||
r.idx2h = (r.idx2h + 1) % ring2hCap
|
||||
if r.count2h < ring2hCap {
|
||||
r.count2h++
|
||||
}
|
||||
|
||||
// Accumulate for 24h downsample.
|
||||
r.acc30s[r.acc30sN] = p
|
||||
r.acc30sN++
|
||||
if r.acc30sN == downsample24hEvery {
|
||||
avg := averagePoints(r.acc30s[:downsample24hEvery])
|
||||
r.push24h(avg)
|
||||
r.acc30sN = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricsRing) push24h(p MetricPoint) {
|
||||
r.buf24h[r.idx24h] = p
|
||||
r.idx24h = (r.idx24h + 1) % ring24hCap
|
||||
if r.count24h < ring24hCap {
|
||||
r.count24h++
|
||||
}
|
||||
}
|
||||
|
||||
// Get10m returns the 10-minute tier points in chronological order.
|
||||
func (r *metricsRing) Get10m() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf10m[:], r.idx10m, r.count10m)
|
||||
}
|
||||
|
||||
// Get2h returns the 2-hour tier points in chronological order.
|
||||
func (r *metricsRing) Get2h() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf2h[:], r.idx2h, r.count2h)
|
||||
}
|
||||
|
||||
// Get24h returns the 24-hour tier points in chronological order.
|
||||
func (r *metricsRing) Get24h() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf24h[:], r.idx24h, r.count24h)
|
||||
}
|
||||
|
||||
// Flush returns all three tiers and resets the ring buffer.
|
||||
func (r *metricsRing) Flush() (pts10m, pts2h, pts24h []MetricPoint) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
pts10m = r.readRing(r.buf10m[:], r.idx10m, r.count10m)
|
||||
pts2h = r.readRing(r.buf2h[:], r.idx2h, r.count2h)
|
||||
pts24h = r.readRing(r.buf24h[:], r.idx24h, r.count24h)
|
||||
|
||||
// Reset all state.
|
||||
r.idx10m, r.count10m = 0, 0
|
||||
r.idx2h, r.count2h = 0, 0
|
||||
r.idx24h, r.count24h = 0, 0
|
||||
r.acc500msN = 0
|
||||
r.acc30sN = 0
|
||||
|
||||
return pts10m, pts2h, pts24h
|
||||
}
|
||||
|
||||
// readRing extracts elements from a circular buffer in chronological order.
|
||||
func (r *metricsRing) readRing(buf []MetricPoint, nextIdx, count int) []MetricPoint {
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
result := make([]MetricPoint, count)
|
||||
bufLen := len(buf)
|
||||
start := (nextIdx - count + bufLen) % bufLen
|
||||
for i := range count {
|
||||
result[i] = buf[(start+i)%bufLen]
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// averagePoints computes the average of a slice of MetricPoints.
|
||||
// The timestamp is set to the last point's timestamp.
|
||||
func averagePoints(pts []MetricPoint) MetricPoint {
|
||||
n := float64(len(pts))
|
||||
var cpu float64
|
||||
var mem, disk int64
|
||||
for _, p := range pts {
|
||||
cpu += p.CPUPct
|
||||
mem += p.MemBytes
|
||||
disk += p.DiskBytes
|
||||
}
|
||||
return MetricPoint{
|
||||
Timestamp: pts[len(pts)-1].Timestamp,
|
||||
CPUPct: cpu / n,
|
||||
MemBytes: int64(float64(mem) / n),
|
||||
DiskBytes: int64(float64(disk) / n),
|
||||
}
|
||||
}
|
||||
105
internal/sandbox/proc.go
Normal file
105
internal/sandbox/proc.go
Normal file
@ -0,0 +1,105 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// findChildPID reads the direct child PID of a given parent process.
|
||||
// The Firecracker process is a direct child of the unshare wrapper because
|
||||
// the init script uses `exec ip netns exec ... firecracker`, which replaces
|
||||
// bash with ip-netns-exec, which in turn execs firecracker — same PID,
|
||||
// direct child of unshare.
|
||||
func findChildPID(parentPID int) (int, error) {
|
||||
path := fmt.Sprintf("/proc/%d/task/%d/children", parentPID, parentPID)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read children: %w", err)
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) == 0 {
|
||||
return 0, fmt.Errorf("no child processes found for PID %d", parentPID)
|
||||
}
|
||||
pid, err := strconv.Atoi(fields[0])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse child PID %q: %w", fields[0], err)
|
||||
}
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
// cpuStat holds raw CPU jiffies read from /proc/{pid}/stat.
|
||||
type cpuStat struct {
|
||||
utime uint64
|
||||
stime uint64
|
||||
}
|
||||
|
||||
// readCPUStat reads user and system CPU jiffies from /proc/{pid}/stat.
|
||||
// Fields 14 (utime) and 15 (stime) are 1-indexed in the man page;
|
||||
// after splitting on space, they are at indices 13 and 14.
|
||||
func readCPUStat(pid int) (cpuStat, error) {
|
||||
path := fmt.Sprintf("/proc/%d/stat", pid)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("read stat: %w", err)
|
||||
}
|
||||
|
||||
// /proc/{pid}/stat format: pid (comm) state fields...
|
||||
// The comm field may contain spaces and parens, so find the last ')' first.
|
||||
content := string(data)
|
||||
idx := strings.LastIndex(content, ")")
|
||||
if idx < 0 {
|
||||
return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: no closing paren", pid)
|
||||
}
|
||||
// After ")" there is " state field3 field4 ... fieldN"
|
||||
// field1 after ')' is state (index 0), utime is field 11, stime is field 12
|
||||
// (0-indexed from after the closing paren).
|
||||
fields := strings.Fields(content[idx+2:])
|
||||
if len(fields) < 13 {
|
||||
return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: too few fields (%d)", pid, len(fields))
|
||||
}
|
||||
utime, err := strconv.ParseUint(fields[11], 10, 64)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("parse utime: %w", err)
|
||||
}
|
||||
stime, err := strconv.ParseUint(fields[12], 10, 64)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("parse stime: %w", err)
|
||||
}
|
||||
return cpuStat{utime: utime, stime: stime}, nil
|
||||
}
|
||||
|
||||
// readMemRSS reads VmRSS from /proc/{pid}/status and returns bytes.
|
||||
func readMemRSS(pid int) (int64, error) {
|
||||
path := fmt.Sprintf("/proc/%d/status", pid)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read status: %w", err)
|
||||
}
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "VmRSS:") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
return 0, fmt.Errorf("malformed VmRSS line")
|
||||
}
|
||||
kb, err := strconv.ParseInt(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse VmRSS: %w", err)
|
||||
}
|
||||
return kb * 1024, nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("VmRSS not found in /proc/%d/status", pid)
|
||||
}
|
||||
|
||||
// readDiskAllocated returns the actual allocated bytes (not apparent size)
|
||||
// of the file at path. This uses stat's block count × 512.
|
||||
func readDiskAllocated(path string) (int64, error) {
|
||||
var stat syscall.Stat_t
|
||||
if err := syscall.Stat(path, &stat); err != nil {
|
||||
return 0, fmt.Errorf("stat %s: %w", path, err)
|
||||
}
|
||||
return stat.Blocks * 512, nil
|
||||
}
|
||||
@ -58,6 +58,8 @@ type hostagentClient = interface {
|
||||
PauseSandbox(ctx context.Context, req *connect.Request[pb.PauseSandboxRequest]) (*connect.Response[pb.PauseSandboxResponse], error)
|
||||
ResumeSandbox(ctx context.Context, req *connect.Request[pb.ResumeSandboxRequest]) (*connect.Response[pb.ResumeSandboxResponse], error)
|
||||
PingSandbox(ctx context.Context, req *connect.Request[pb.PingSandboxRequest]) (*connect.Response[pb.PingSandboxResponse], error)
|
||||
GetSandboxMetrics(ctx context.Context, req *connect.Request[pb.GetSandboxMetricsRequest]) (*connect.Response[pb.GetSandboxMetricsResponse], error)
|
||||
FlushSandboxMetrics(ctx context.Context, req *connect.Request[pb.FlushSandboxMetricsRequest]) (*connect.Response[pb.FlushSandboxMetricsResponse], error)
|
||||
}
|
||||
|
||||
// Create creates a new sandbox: picks a host via the scheduler, inserts a pending
|
||||
@ -180,6 +182,9 @@ func (s *SandboxService) Pause(ctx context.Context, sandboxID, teamID string) (d
|
||||
return db.Sandbox{}, err
|
||||
}
|
||||
|
||||
// Flush all metrics tiers before pausing so data survives in DB.
|
||||
s.flushAndPersistMetrics(ctx, agent, sandboxID, true)
|
||||
|
||||
if _, err := agent.PauseSandbox(ctx, connect.NewRequest(&pb.PauseSandboxRequest{
|
||||
SandboxId: sandboxID,
|
||||
})); err != nil {
|
||||
@ -236,7 +241,8 @@ func (s *SandboxService) Resume(ctx context.Context, sandboxID, teamID string) (
|
||||
|
||||
// Destroy stops a sandbox and marks it as stopped.
|
||||
func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string) error {
|
||||
if _, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID}); err != nil {
|
||||
sb, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID})
|
||||
if err != nil {
|
||||
return fmt.Errorf("sandbox not found: %w", err)
|
||||
}
|
||||
|
||||
@ -245,6 +251,11 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string)
|
||||
return err
|
||||
}
|
||||
|
||||
// If running, flush 24h tier metrics for analytics before destroying.
|
||||
if sb.Status == "running" {
|
||||
s.flushAndPersistMetrics(ctx, agent, sandboxID, false)
|
||||
}
|
||||
|
||||
// Destroy on host agent. A not-found response is fine — sandbox is already gone.
|
||||
if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{
|
||||
SandboxId: sandboxID,
|
||||
@ -252,6 +263,16 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string)
|
||||
return fmt.Errorf("agent destroy: %w", err)
|
||||
}
|
||||
|
||||
// For a paused sandbox, only keep 24h tier; remove the finer-grained tiers.
|
||||
if sb.Status == "paused" {
|
||||
_ = s.DB.DeleteSandboxMetricPointsByTier(ctx, db.DeleteSandboxMetricPointsByTierParams{
|
||||
SandboxID: sandboxID, Tier: "10m",
|
||||
})
|
||||
_ = s.DB.DeleteSandboxMetricPointsByTier(ctx, db.DeleteSandboxMetricPointsByTierParams{
|
||||
SandboxID: sandboxID, Tier: "2h",
|
||||
})
|
||||
}
|
||||
|
||||
if _, err := s.DB.UpdateSandboxStatus(ctx, db.UpdateSandboxStatusParams{
|
||||
ID: sandboxID, Status: "stopped",
|
||||
}); err != nil {
|
||||
@ -260,6 +281,41 @@ func (s *SandboxService) Destroy(ctx context.Context, sandboxID, teamID string)
|
||||
return nil
|
||||
}
|
||||
|
||||
// flushAndPersistMetrics calls FlushSandboxMetrics on the agent and stores
|
||||
// the returned data to DB. If allTiers is true, all three tiers are saved;
|
||||
// otherwise only the 24h tier (for post-destroy analytics).
|
||||
func (s *SandboxService) flushAndPersistMetrics(ctx context.Context, agent hostagentClient, sandboxID string, allTiers bool) {
|
||||
resp, err := agent.FlushSandboxMetrics(ctx, connect.NewRequest(&pb.FlushSandboxMetricsRequest{
|
||||
SandboxId: sandboxID,
|
||||
}))
|
||||
if err != nil {
|
||||
slog.Warn("flush metrics failed (best-effort)", "sandbox_id", sandboxID, "error", err)
|
||||
return
|
||||
}
|
||||
msg := resp.Msg
|
||||
|
||||
if allTiers {
|
||||
s.persistMetricPoints(ctx, sandboxID, "10m", msg.Points_10M)
|
||||
s.persistMetricPoints(ctx, sandboxID, "2h", msg.Points_2H)
|
||||
}
|
||||
s.persistMetricPoints(ctx, sandboxID, "24h", msg.Points_24H)
|
||||
}
|
||||
|
||||
func (s *SandboxService) persistMetricPoints(ctx context.Context, sandboxID, tier string, points []*pb.MetricPoint) {
|
||||
for _, p := range points {
|
||||
if err := s.DB.InsertSandboxMetricPoint(ctx, db.InsertSandboxMetricPointParams{
|
||||
SandboxID: sandboxID,
|
||||
Tier: tier,
|
||||
Ts: p.TimestampUnix,
|
||||
CpuPct: p.CpuPct,
|
||||
MemBytes: p.MemBytes,
|
||||
DiskBytes: p.DiskBytes,
|
||||
}); err != nil {
|
||||
slog.Warn("persist metric point failed", "sandbox_id", sandboxID, "tier", tier, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ping resets the inactivity timer for a running sandbox.
|
||||
func (s *SandboxService) Ping(ctx context.Context, sandboxID, teamID string) error {
|
||||
sb, err := s.DB.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: teamID})
|
||||
|
||||
@ -250,6 +250,12 @@ func (m *Manager) CreateFromSnapshot(ctx context.Context, cfg VMConfig, snapPath
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
// PID returns the process ID of the unshare wrapper process.
|
||||
// The actual Firecracker process is a direct child of this PID.
|
||||
func (v *VM) PID() int {
|
||||
return v.process.cmd.Process.Pid
|
||||
}
|
||||
|
||||
// Get returns a running VM by sandbox ID.
|
||||
func (m *Manager) Get(sandboxID string) (*VM, bool) {
|
||||
vm, ok := m.vms[sandboxID]
|
||||
|
||||
@ -1902,6 +1902,275 @@ func (*TerminateResponse) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{34}
|
||||
}
|
||||
|
||||
type MetricPoint struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
TimestampUnix int64 `protobuf:"varint,1,opt,name=timestamp_unix,json=timestampUnix,proto3" json:"timestamp_unix,omitempty"`
|
||||
CpuPct float64 `protobuf:"fixed64,2,opt,name=cpu_pct,json=cpuPct,proto3" json:"cpu_pct,omitempty"`
|
||||
MemBytes int64 `protobuf:"varint,3,opt,name=mem_bytes,json=memBytes,proto3" json:"mem_bytes,omitempty"`
|
||||
DiskBytes int64 `protobuf:"varint,4,opt,name=disk_bytes,json=diskBytes,proto3" json:"disk_bytes,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *MetricPoint) Reset() {
|
||||
*x = MetricPoint{}
|
||||
mi := &file_hostagent_proto_msgTypes[35]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *MetricPoint) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*MetricPoint) ProtoMessage() {}
|
||||
|
||||
func (x *MetricPoint) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_hostagent_proto_msgTypes[35]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use MetricPoint.ProtoReflect.Descriptor instead.
|
||||
func (*MetricPoint) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{35}
|
||||
}
|
||||
|
||||
func (x *MetricPoint) GetTimestampUnix() int64 {
|
||||
if x != nil {
|
||||
return x.TimestampUnix
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *MetricPoint) GetCpuPct() float64 {
|
||||
if x != nil {
|
||||
return x.CpuPct
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *MetricPoint) GetMemBytes() int64 {
|
||||
if x != nil {
|
||||
return x.MemBytes
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *MetricPoint) GetDiskBytes() int64 {
|
||||
if x != nil {
|
||||
return x.DiskBytes
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type GetSandboxMetricsRequest struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
|
||||
// Range tier: "10m", "2h", or "24h".
|
||||
Range string `protobuf:"bytes,2,opt,name=range,proto3" json:"range,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsRequest) Reset() {
|
||||
*x = GetSandboxMetricsRequest{}
|
||||
mi := &file_hostagent_proto_msgTypes[36]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsRequest) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*GetSandboxMetricsRequest) ProtoMessage() {}
|
||||
|
||||
func (x *GetSandboxMetricsRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_hostagent_proto_msgTypes[36]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use GetSandboxMetricsRequest.ProtoReflect.Descriptor instead.
|
||||
func (*GetSandboxMetricsRequest) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{36}
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsRequest) GetSandboxId() string {
|
||||
if x != nil {
|
||||
return x.SandboxId
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsRequest) GetRange() string {
|
||||
if x != nil {
|
||||
return x.Range
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type GetSandboxMetricsResponse struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Points []*MetricPoint `protobuf:"bytes,1,rep,name=points,proto3" json:"points,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsResponse) Reset() {
|
||||
*x = GetSandboxMetricsResponse{}
|
||||
mi := &file_hostagent_proto_msgTypes[37]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsResponse) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*GetSandboxMetricsResponse) ProtoMessage() {}
|
||||
|
||||
func (x *GetSandboxMetricsResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_hostagent_proto_msgTypes[37]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use GetSandboxMetricsResponse.ProtoReflect.Descriptor instead.
|
||||
func (*GetSandboxMetricsResponse) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{37}
|
||||
}
|
||||
|
||||
func (x *GetSandboxMetricsResponse) GetPoints() []*MetricPoint {
|
||||
if x != nil {
|
||||
return x.Points
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type FlushSandboxMetricsRequest struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsRequest) Reset() {
|
||||
*x = FlushSandboxMetricsRequest{}
|
||||
mi := &file_hostagent_proto_msgTypes[38]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsRequest) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*FlushSandboxMetricsRequest) ProtoMessage() {}
|
||||
|
||||
func (x *FlushSandboxMetricsRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_hostagent_proto_msgTypes[38]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use FlushSandboxMetricsRequest.ProtoReflect.Descriptor instead.
|
||||
func (*FlushSandboxMetricsRequest) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{38}
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsRequest) GetSandboxId() string {
|
||||
if x != nil {
|
||||
return x.SandboxId
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type FlushSandboxMetricsResponse struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Points_10M []*MetricPoint `protobuf:"bytes,1,rep,name=points_10m,json=points10m,proto3" json:"points_10m,omitempty"`
|
||||
Points_2H []*MetricPoint `protobuf:"bytes,2,rep,name=points_2h,json=points2h,proto3" json:"points_2h,omitempty"`
|
||||
Points_24H []*MetricPoint `protobuf:"bytes,3,rep,name=points_24h,json=points24h,proto3" json:"points_24h,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) Reset() {
|
||||
*x = FlushSandboxMetricsResponse{}
|
||||
mi := &file_hostagent_proto_msgTypes[39]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*FlushSandboxMetricsResponse) ProtoMessage() {}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_hostagent_proto_msgTypes[39]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use FlushSandboxMetricsResponse.ProtoReflect.Descriptor instead.
|
||||
func (*FlushSandboxMetricsResponse) Descriptor() ([]byte, []int) {
|
||||
return file_hostagent_proto_rawDescGZIP(), []int{39}
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) GetPoints_10M() []*MetricPoint {
|
||||
if x != nil {
|
||||
return x.Points_10M
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) GetPoints_2H() []*MetricPoint {
|
||||
if x != nil {
|
||||
return x.Points_2H
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *FlushSandboxMetricsResponse) GetPoints_24H() []*MetricPoint {
|
||||
if x != nil {
|
||||
return x.Points_24H
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var File_hostagent_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_hostagent_proto_rawDesc = "" +
|
||||
@ -2029,8 +2298,28 @@ const file_hostagent_proto_rawDesc = "" +
|
||||
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x15\n" +
|
||||
"\x13PingSandboxResponse\"\x12\n" +
|
||||
"\x10TerminateRequest\"\x13\n" +
|
||||
"\x11TerminateResponse2\x9c\n" +
|
||||
"\x11TerminateResponse\"\x89\x01\n" +
|
||||
"\vMetricPoint\x12%\n" +
|
||||
"\x0etimestamp_unix\x18\x01 \x01(\x03R\rtimestampUnix\x12\x17\n" +
|
||||
"\acpu_pct\x18\x02 \x01(\x01R\x06cpuPct\x12\x1b\n" +
|
||||
"\tmem_bytes\x18\x03 \x01(\x03R\bmemBytes\x12\x1d\n" +
|
||||
"\n" +
|
||||
"disk_bytes\x18\x04 \x01(\x03R\tdiskBytes\"O\n" +
|
||||
"\x18GetSandboxMetricsRequest\x12\x1d\n" +
|
||||
"\n" +
|
||||
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x14\n" +
|
||||
"\x05range\x18\x02 \x01(\tR\x05range\"N\n" +
|
||||
"\x19GetSandboxMetricsResponse\x121\n" +
|
||||
"\x06points\x18\x01 \x03(\v2\x19.hostagent.v1.MetricPointR\x06points\";\n" +
|
||||
"\x1aFlushSandboxMetricsRequest\x12\x1d\n" +
|
||||
"\n" +
|
||||
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\xc9\x01\n" +
|
||||
"\x1bFlushSandboxMetricsResponse\x128\n" +
|
||||
"\n" +
|
||||
"points_10m\x18\x01 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints10m\x126\n" +
|
||||
"\tpoints_2h\x18\x02 \x03(\v2\x19.hostagent.v1.MetricPointR\bpoints2h\x128\n" +
|
||||
"\n" +
|
||||
"points_24h\x18\x03 \x03(\v2\x19.hostagent.v1.MetricPointR\tpoints24h2\xee\v\n" +
|
||||
"\x10HostAgentService\x12X\n" +
|
||||
"\rCreateSandbox\x12\".hostagent.v1.CreateSandboxRequest\x1a#.hostagent.v1.CreateSandboxResponse\x12[\n" +
|
||||
"\x0eDestroySandbox\x12#.hostagent.v1.DestroySandboxRequest\x1a$.hostagent.v1.DestroySandboxResponse\x12U\n" +
|
||||
@ -2047,7 +2336,9 @@ const file_hostagent_proto_rawDesc = "" +
|
||||
"\x0fWriteFileStream\x12$.hostagent.v1.WriteFileStreamRequest\x1a%.hostagent.v1.WriteFileStreamResponse(\x01\x12]\n" +
|
||||
"\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01\x12R\n" +
|
||||
"\vPingSandbox\x12 .hostagent.v1.PingSandboxRequest\x1a!.hostagent.v1.PingSandboxResponse\x12L\n" +
|
||||
"\tTerminate\x12\x1e.hostagent.v1.TerminateRequest\x1a\x1f.hostagent.v1.TerminateResponseB\xb0\x01\n" +
|
||||
"\tTerminate\x12\x1e.hostagent.v1.TerminateRequest\x1a\x1f.hostagent.v1.TerminateResponse\x12d\n" +
|
||||
"\x11GetSandboxMetrics\x12&.hostagent.v1.GetSandboxMetricsRequest\x1a'.hostagent.v1.GetSandboxMetricsResponse\x12j\n" +
|
||||
"\x13FlushSandboxMetrics\x12(.hostagent.v1.FlushSandboxMetricsRequest\x1a).hostagent.v1.FlushSandboxMetricsResponseB\xb0\x01\n" +
|
||||
"\x10com.hostagent.v1B\x0eHostagentProtoP\x01Z;git.omukk.dev/wrenn/sandbox/proto/hostagent/gen;hostagentv1\xa2\x02\x03HXX\xaa\x02\fHostagent.V1\xca\x02\fHostagent\\V1\xe2\x02\x18Hostagent\\V1\\GPBMetadata\xea\x02\rHostagent::V1b\x06proto3"
|
||||
|
||||
var (
|
||||
@ -2062,43 +2353,48 @@ func file_hostagent_proto_rawDescGZIP() []byte {
|
||||
return file_hostagent_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 35)
|
||||
var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 40)
|
||||
var file_hostagent_proto_goTypes = []any{
|
||||
(*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest
|
||||
(*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse
|
||||
(*DestroySandboxRequest)(nil), // 2: hostagent.v1.DestroySandboxRequest
|
||||
(*DestroySandboxResponse)(nil), // 3: hostagent.v1.DestroySandboxResponse
|
||||
(*PauseSandboxRequest)(nil), // 4: hostagent.v1.PauseSandboxRequest
|
||||
(*PauseSandboxResponse)(nil), // 5: hostagent.v1.PauseSandboxResponse
|
||||
(*ResumeSandboxRequest)(nil), // 6: hostagent.v1.ResumeSandboxRequest
|
||||
(*ResumeSandboxResponse)(nil), // 7: hostagent.v1.ResumeSandboxResponse
|
||||
(*CreateSnapshotRequest)(nil), // 8: hostagent.v1.CreateSnapshotRequest
|
||||
(*CreateSnapshotResponse)(nil), // 9: hostagent.v1.CreateSnapshotResponse
|
||||
(*DeleteSnapshotRequest)(nil), // 10: hostagent.v1.DeleteSnapshotRequest
|
||||
(*DeleteSnapshotResponse)(nil), // 11: hostagent.v1.DeleteSnapshotResponse
|
||||
(*ExecRequest)(nil), // 12: hostagent.v1.ExecRequest
|
||||
(*ExecResponse)(nil), // 13: hostagent.v1.ExecResponse
|
||||
(*ListSandboxesRequest)(nil), // 14: hostagent.v1.ListSandboxesRequest
|
||||
(*ListSandboxesResponse)(nil), // 15: hostagent.v1.ListSandboxesResponse
|
||||
(*SandboxInfo)(nil), // 16: hostagent.v1.SandboxInfo
|
||||
(*WriteFileRequest)(nil), // 17: hostagent.v1.WriteFileRequest
|
||||
(*WriteFileResponse)(nil), // 18: hostagent.v1.WriteFileResponse
|
||||
(*ReadFileRequest)(nil), // 19: hostagent.v1.ReadFileRequest
|
||||
(*ReadFileResponse)(nil), // 20: hostagent.v1.ReadFileResponse
|
||||
(*ExecStreamRequest)(nil), // 21: hostagent.v1.ExecStreamRequest
|
||||
(*ExecStreamResponse)(nil), // 22: hostagent.v1.ExecStreamResponse
|
||||
(*ExecStreamStart)(nil), // 23: hostagent.v1.ExecStreamStart
|
||||
(*ExecStreamData)(nil), // 24: hostagent.v1.ExecStreamData
|
||||
(*ExecStreamEnd)(nil), // 25: hostagent.v1.ExecStreamEnd
|
||||
(*WriteFileStreamRequest)(nil), // 26: hostagent.v1.WriteFileStreamRequest
|
||||
(*WriteFileStreamMeta)(nil), // 27: hostagent.v1.WriteFileStreamMeta
|
||||
(*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse
|
||||
(*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest
|
||||
(*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse
|
||||
(*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest
|
||||
(*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse
|
||||
(*TerminateRequest)(nil), // 33: hostagent.v1.TerminateRequest
|
||||
(*TerminateResponse)(nil), // 34: hostagent.v1.TerminateResponse
|
||||
(*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest
|
||||
(*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse
|
||||
(*DestroySandboxRequest)(nil), // 2: hostagent.v1.DestroySandboxRequest
|
||||
(*DestroySandboxResponse)(nil), // 3: hostagent.v1.DestroySandboxResponse
|
||||
(*PauseSandboxRequest)(nil), // 4: hostagent.v1.PauseSandboxRequest
|
||||
(*PauseSandboxResponse)(nil), // 5: hostagent.v1.PauseSandboxResponse
|
||||
(*ResumeSandboxRequest)(nil), // 6: hostagent.v1.ResumeSandboxRequest
|
||||
(*ResumeSandboxResponse)(nil), // 7: hostagent.v1.ResumeSandboxResponse
|
||||
(*CreateSnapshotRequest)(nil), // 8: hostagent.v1.CreateSnapshotRequest
|
||||
(*CreateSnapshotResponse)(nil), // 9: hostagent.v1.CreateSnapshotResponse
|
||||
(*DeleteSnapshotRequest)(nil), // 10: hostagent.v1.DeleteSnapshotRequest
|
||||
(*DeleteSnapshotResponse)(nil), // 11: hostagent.v1.DeleteSnapshotResponse
|
||||
(*ExecRequest)(nil), // 12: hostagent.v1.ExecRequest
|
||||
(*ExecResponse)(nil), // 13: hostagent.v1.ExecResponse
|
||||
(*ListSandboxesRequest)(nil), // 14: hostagent.v1.ListSandboxesRequest
|
||||
(*ListSandboxesResponse)(nil), // 15: hostagent.v1.ListSandboxesResponse
|
||||
(*SandboxInfo)(nil), // 16: hostagent.v1.SandboxInfo
|
||||
(*WriteFileRequest)(nil), // 17: hostagent.v1.WriteFileRequest
|
||||
(*WriteFileResponse)(nil), // 18: hostagent.v1.WriteFileResponse
|
||||
(*ReadFileRequest)(nil), // 19: hostagent.v1.ReadFileRequest
|
||||
(*ReadFileResponse)(nil), // 20: hostagent.v1.ReadFileResponse
|
||||
(*ExecStreamRequest)(nil), // 21: hostagent.v1.ExecStreamRequest
|
||||
(*ExecStreamResponse)(nil), // 22: hostagent.v1.ExecStreamResponse
|
||||
(*ExecStreamStart)(nil), // 23: hostagent.v1.ExecStreamStart
|
||||
(*ExecStreamData)(nil), // 24: hostagent.v1.ExecStreamData
|
||||
(*ExecStreamEnd)(nil), // 25: hostagent.v1.ExecStreamEnd
|
||||
(*WriteFileStreamRequest)(nil), // 26: hostagent.v1.WriteFileStreamRequest
|
||||
(*WriteFileStreamMeta)(nil), // 27: hostagent.v1.WriteFileStreamMeta
|
||||
(*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse
|
||||
(*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest
|
||||
(*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse
|
||||
(*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest
|
||||
(*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse
|
||||
(*TerminateRequest)(nil), // 33: hostagent.v1.TerminateRequest
|
||||
(*TerminateResponse)(nil), // 34: hostagent.v1.TerminateResponse
|
||||
(*MetricPoint)(nil), // 35: hostagent.v1.MetricPoint
|
||||
(*GetSandboxMetricsRequest)(nil), // 36: hostagent.v1.GetSandboxMetricsRequest
|
||||
(*GetSandboxMetricsResponse)(nil), // 37: hostagent.v1.GetSandboxMetricsResponse
|
||||
(*FlushSandboxMetricsRequest)(nil), // 38: hostagent.v1.FlushSandboxMetricsRequest
|
||||
(*FlushSandboxMetricsResponse)(nil), // 39: hostagent.v1.FlushSandboxMetricsResponse
|
||||
}
|
||||
var file_hostagent_proto_depIdxs = []int32{
|
||||
16, // 0: hostagent.v1.ListSandboxesResponse.sandboxes:type_name -> hostagent.v1.SandboxInfo
|
||||
@ -2106,41 +2402,49 @@ var file_hostagent_proto_depIdxs = []int32{
|
||||
24, // 2: hostagent.v1.ExecStreamResponse.data:type_name -> hostagent.v1.ExecStreamData
|
||||
25, // 3: hostagent.v1.ExecStreamResponse.end:type_name -> hostagent.v1.ExecStreamEnd
|
||||
27, // 4: hostagent.v1.WriteFileStreamRequest.meta:type_name -> hostagent.v1.WriteFileStreamMeta
|
||||
0, // 5: hostagent.v1.HostAgentService.CreateSandbox:input_type -> hostagent.v1.CreateSandboxRequest
|
||||
2, // 6: hostagent.v1.HostAgentService.DestroySandbox:input_type -> hostagent.v1.DestroySandboxRequest
|
||||
4, // 7: hostagent.v1.HostAgentService.PauseSandbox:input_type -> hostagent.v1.PauseSandboxRequest
|
||||
6, // 8: hostagent.v1.HostAgentService.ResumeSandbox:input_type -> hostagent.v1.ResumeSandboxRequest
|
||||
12, // 9: hostagent.v1.HostAgentService.Exec:input_type -> hostagent.v1.ExecRequest
|
||||
14, // 10: hostagent.v1.HostAgentService.ListSandboxes:input_type -> hostagent.v1.ListSandboxesRequest
|
||||
17, // 11: hostagent.v1.HostAgentService.WriteFile:input_type -> hostagent.v1.WriteFileRequest
|
||||
19, // 12: hostagent.v1.HostAgentService.ReadFile:input_type -> hostagent.v1.ReadFileRequest
|
||||
8, // 13: hostagent.v1.HostAgentService.CreateSnapshot:input_type -> hostagent.v1.CreateSnapshotRequest
|
||||
10, // 14: hostagent.v1.HostAgentService.DeleteSnapshot:input_type -> hostagent.v1.DeleteSnapshotRequest
|
||||
21, // 15: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest
|
||||
26, // 16: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest
|
||||
29, // 17: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest
|
||||
31, // 18: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest
|
||||
33, // 19: hostagent.v1.HostAgentService.Terminate:input_type -> hostagent.v1.TerminateRequest
|
||||
1, // 20: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse
|
||||
3, // 21: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse
|
||||
5, // 22: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse
|
||||
7, // 23: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse
|
||||
13, // 24: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse
|
||||
15, // 25: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse
|
||||
18, // 26: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse
|
||||
20, // 27: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse
|
||||
9, // 28: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse
|
||||
11, // 29: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse
|
||||
22, // 30: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse
|
||||
28, // 31: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse
|
||||
30, // 32: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse
|
||||
32, // 33: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse
|
||||
34, // 34: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse
|
||||
20, // [20:35] is the sub-list for method output_type
|
||||
5, // [5:20] is the sub-list for method input_type
|
||||
5, // [5:5] is the sub-list for extension type_name
|
||||
5, // [5:5] is the sub-list for extension extendee
|
||||
0, // [0:5] is the sub-list for field type_name
|
||||
35, // 5: hostagent.v1.GetSandboxMetricsResponse.points:type_name -> hostagent.v1.MetricPoint
|
||||
35, // 6: hostagent.v1.FlushSandboxMetricsResponse.points_10m:type_name -> hostagent.v1.MetricPoint
|
||||
35, // 7: hostagent.v1.FlushSandboxMetricsResponse.points_2h:type_name -> hostagent.v1.MetricPoint
|
||||
35, // 8: hostagent.v1.FlushSandboxMetricsResponse.points_24h:type_name -> hostagent.v1.MetricPoint
|
||||
0, // 9: hostagent.v1.HostAgentService.CreateSandbox:input_type -> hostagent.v1.CreateSandboxRequest
|
||||
2, // 10: hostagent.v1.HostAgentService.DestroySandbox:input_type -> hostagent.v1.DestroySandboxRequest
|
||||
4, // 11: hostagent.v1.HostAgentService.PauseSandbox:input_type -> hostagent.v1.PauseSandboxRequest
|
||||
6, // 12: hostagent.v1.HostAgentService.ResumeSandbox:input_type -> hostagent.v1.ResumeSandboxRequest
|
||||
12, // 13: hostagent.v1.HostAgentService.Exec:input_type -> hostagent.v1.ExecRequest
|
||||
14, // 14: hostagent.v1.HostAgentService.ListSandboxes:input_type -> hostagent.v1.ListSandboxesRequest
|
||||
17, // 15: hostagent.v1.HostAgentService.WriteFile:input_type -> hostagent.v1.WriteFileRequest
|
||||
19, // 16: hostagent.v1.HostAgentService.ReadFile:input_type -> hostagent.v1.ReadFileRequest
|
||||
8, // 17: hostagent.v1.HostAgentService.CreateSnapshot:input_type -> hostagent.v1.CreateSnapshotRequest
|
||||
10, // 18: hostagent.v1.HostAgentService.DeleteSnapshot:input_type -> hostagent.v1.DeleteSnapshotRequest
|
||||
21, // 19: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest
|
||||
26, // 20: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest
|
||||
29, // 21: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest
|
||||
31, // 22: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest
|
||||
33, // 23: hostagent.v1.HostAgentService.Terminate:input_type -> hostagent.v1.TerminateRequest
|
||||
36, // 24: hostagent.v1.HostAgentService.GetSandboxMetrics:input_type -> hostagent.v1.GetSandboxMetricsRequest
|
||||
38, // 25: hostagent.v1.HostAgentService.FlushSandboxMetrics:input_type -> hostagent.v1.FlushSandboxMetricsRequest
|
||||
1, // 26: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse
|
||||
3, // 27: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse
|
||||
5, // 28: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse
|
||||
7, // 29: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse
|
||||
13, // 30: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse
|
||||
15, // 31: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse
|
||||
18, // 32: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse
|
||||
20, // 33: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse
|
||||
9, // 34: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse
|
||||
11, // 35: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse
|
||||
22, // 36: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse
|
||||
28, // 37: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse
|
||||
30, // 38: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse
|
||||
32, // 39: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse
|
||||
34, // 40: hostagent.v1.HostAgentService.Terminate:output_type -> hostagent.v1.TerminateResponse
|
||||
37, // 41: hostagent.v1.HostAgentService.GetSandboxMetrics:output_type -> hostagent.v1.GetSandboxMetricsResponse
|
||||
39, // 42: hostagent.v1.HostAgentService.FlushSandboxMetrics:output_type -> hostagent.v1.FlushSandboxMetricsResponse
|
||||
26, // [26:43] is the sub-list for method output_type
|
||||
9, // [9:26] is the sub-list for method input_type
|
||||
9, // [9:9] is the sub-list for extension type_name
|
||||
9, // [9:9] is the sub-list for extension extendee
|
||||
0, // [0:9] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_hostagent_proto_init() }
|
||||
@ -2167,7 +2471,7 @@ func file_hostagent_proto_init() {
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_hostagent_proto_rawDesc), len(file_hostagent_proto_rawDesc)),
|
||||
NumEnums: 0,
|
||||
NumMessages: 35,
|
||||
NumMessages: 40,
|
||||
NumExtensions: 0,
|
||||
NumServices: 1,
|
||||
},
|
||||
|
||||
@ -77,6 +77,12 @@ const (
|
||||
// HostAgentServiceTerminateProcedure is the fully-qualified name of the HostAgentService's
|
||||
// Terminate RPC.
|
||||
HostAgentServiceTerminateProcedure = "/hostagent.v1.HostAgentService/Terminate"
|
||||
// HostAgentServiceGetSandboxMetricsProcedure is the fully-qualified name of the HostAgentService's
|
||||
// GetSandboxMetrics RPC.
|
||||
HostAgentServiceGetSandboxMetricsProcedure = "/hostagent.v1.HostAgentService/GetSandboxMetrics"
|
||||
// HostAgentServiceFlushSandboxMetricsProcedure is the fully-qualified name of the
|
||||
// HostAgentService's FlushSandboxMetrics RPC.
|
||||
HostAgentServiceFlushSandboxMetricsProcedure = "/hostagent.v1.HostAgentService/FlushSandboxMetrics"
|
||||
)
|
||||
|
||||
// HostAgentServiceClient is a client for the hostagent.v1.HostAgentService service.
|
||||
@ -115,6 +121,11 @@ type HostAgentServiceClient interface {
|
||||
// Called by the control plane immediately when a host is deleted so the
|
||||
// agent shuts down without waiting for the next heartbeat cycle.
|
||||
Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error)
|
||||
// GetSandboxMetrics returns ring buffer metrics for a running sandbox.
|
||||
GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error)
|
||||
// FlushSandboxMetrics returns all ring buffer tiers and clears them.
|
||||
// Called by the control plane before pause/destroy to persist metrics to DB.
|
||||
FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error)
|
||||
}
|
||||
|
||||
// NewHostAgentServiceClient constructs a client for the hostagent.v1.HostAgentService service. By
|
||||
@ -218,26 +229,40 @@ func NewHostAgentServiceClient(httpClient connect.HTTPClient, baseURL string, op
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("Terminate")),
|
||||
connect.WithClientOptions(opts...),
|
||||
),
|
||||
getSandboxMetrics: connect.NewClient[gen.GetSandboxMetricsRequest, gen.GetSandboxMetricsResponse](
|
||||
httpClient,
|
||||
baseURL+HostAgentServiceGetSandboxMetricsProcedure,
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("GetSandboxMetrics")),
|
||||
connect.WithClientOptions(opts...),
|
||||
),
|
||||
flushSandboxMetrics: connect.NewClient[gen.FlushSandboxMetricsRequest, gen.FlushSandboxMetricsResponse](
|
||||
httpClient,
|
||||
baseURL+HostAgentServiceFlushSandboxMetricsProcedure,
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")),
|
||||
connect.WithClientOptions(opts...),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// hostAgentServiceClient implements HostAgentServiceClient.
|
||||
type hostAgentServiceClient struct {
|
||||
createSandbox *connect.Client[gen.CreateSandboxRequest, gen.CreateSandboxResponse]
|
||||
destroySandbox *connect.Client[gen.DestroySandboxRequest, gen.DestroySandboxResponse]
|
||||
pauseSandbox *connect.Client[gen.PauseSandboxRequest, gen.PauseSandboxResponse]
|
||||
resumeSandbox *connect.Client[gen.ResumeSandboxRequest, gen.ResumeSandboxResponse]
|
||||
exec *connect.Client[gen.ExecRequest, gen.ExecResponse]
|
||||
listSandboxes *connect.Client[gen.ListSandboxesRequest, gen.ListSandboxesResponse]
|
||||
writeFile *connect.Client[gen.WriteFileRequest, gen.WriteFileResponse]
|
||||
readFile *connect.Client[gen.ReadFileRequest, gen.ReadFileResponse]
|
||||
createSnapshot *connect.Client[gen.CreateSnapshotRequest, gen.CreateSnapshotResponse]
|
||||
deleteSnapshot *connect.Client[gen.DeleteSnapshotRequest, gen.DeleteSnapshotResponse]
|
||||
execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse]
|
||||
writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse]
|
||||
readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse]
|
||||
pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse]
|
||||
terminate *connect.Client[gen.TerminateRequest, gen.TerminateResponse]
|
||||
createSandbox *connect.Client[gen.CreateSandboxRequest, gen.CreateSandboxResponse]
|
||||
destroySandbox *connect.Client[gen.DestroySandboxRequest, gen.DestroySandboxResponse]
|
||||
pauseSandbox *connect.Client[gen.PauseSandboxRequest, gen.PauseSandboxResponse]
|
||||
resumeSandbox *connect.Client[gen.ResumeSandboxRequest, gen.ResumeSandboxResponse]
|
||||
exec *connect.Client[gen.ExecRequest, gen.ExecResponse]
|
||||
listSandboxes *connect.Client[gen.ListSandboxesRequest, gen.ListSandboxesResponse]
|
||||
writeFile *connect.Client[gen.WriteFileRequest, gen.WriteFileResponse]
|
||||
readFile *connect.Client[gen.ReadFileRequest, gen.ReadFileResponse]
|
||||
createSnapshot *connect.Client[gen.CreateSnapshotRequest, gen.CreateSnapshotResponse]
|
||||
deleteSnapshot *connect.Client[gen.DeleteSnapshotRequest, gen.DeleteSnapshotResponse]
|
||||
execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse]
|
||||
writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse]
|
||||
readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse]
|
||||
pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse]
|
||||
terminate *connect.Client[gen.TerminateRequest, gen.TerminateResponse]
|
||||
getSandboxMetrics *connect.Client[gen.GetSandboxMetricsRequest, gen.GetSandboxMetricsResponse]
|
||||
flushSandboxMetrics *connect.Client[gen.FlushSandboxMetricsRequest, gen.FlushSandboxMetricsResponse]
|
||||
}
|
||||
|
||||
// CreateSandbox calls hostagent.v1.HostAgentService.CreateSandbox.
|
||||
@ -315,6 +340,16 @@ func (c *hostAgentServiceClient) Terminate(ctx context.Context, req *connect.Req
|
||||
return c.terminate.CallUnary(ctx, req)
|
||||
}
|
||||
|
||||
// GetSandboxMetrics calls hostagent.v1.HostAgentService.GetSandboxMetrics.
|
||||
func (c *hostAgentServiceClient) GetSandboxMetrics(ctx context.Context, req *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) {
|
||||
return c.getSandboxMetrics.CallUnary(ctx, req)
|
||||
}
|
||||
|
||||
// FlushSandboxMetrics calls hostagent.v1.HostAgentService.FlushSandboxMetrics.
|
||||
func (c *hostAgentServiceClient) FlushSandboxMetrics(ctx context.Context, req *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) {
|
||||
return c.flushSandboxMetrics.CallUnary(ctx, req)
|
||||
}
|
||||
|
||||
// HostAgentServiceHandler is an implementation of the hostagent.v1.HostAgentService service.
|
||||
type HostAgentServiceHandler interface {
|
||||
// CreateSandbox boots a new microVM with the given configuration.
|
||||
@ -351,6 +386,11 @@ type HostAgentServiceHandler interface {
|
||||
// Called by the control plane immediately when a host is deleted so the
|
||||
// agent shuts down without waiting for the next heartbeat cycle.
|
||||
Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error)
|
||||
// GetSandboxMetrics returns ring buffer metrics for a running sandbox.
|
||||
GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error)
|
||||
// FlushSandboxMetrics returns all ring buffer tiers and clears them.
|
||||
// Called by the control plane before pause/destroy to persist metrics to DB.
|
||||
FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error)
|
||||
}
|
||||
|
||||
// NewHostAgentServiceHandler builds an HTTP handler from the service implementation. It returns the
|
||||
@ -450,6 +490,18 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("Terminate")),
|
||||
connect.WithHandlerOptions(opts...),
|
||||
)
|
||||
hostAgentServiceGetSandboxMetricsHandler := connect.NewUnaryHandler(
|
||||
HostAgentServiceGetSandboxMetricsProcedure,
|
||||
svc.GetSandboxMetrics,
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("GetSandboxMetrics")),
|
||||
connect.WithHandlerOptions(opts...),
|
||||
)
|
||||
hostAgentServiceFlushSandboxMetricsHandler := connect.NewUnaryHandler(
|
||||
HostAgentServiceFlushSandboxMetricsProcedure,
|
||||
svc.FlushSandboxMetrics,
|
||||
connect.WithSchema(hostAgentServiceMethods.ByName("FlushSandboxMetrics")),
|
||||
connect.WithHandlerOptions(opts...),
|
||||
)
|
||||
return "/hostagent.v1.HostAgentService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case HostAgentServiceCreateSandboxProcedure:
|
||||
@ -482,6 +534,10 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han
|
||||
hostAgentServicePingSandboxHandler.ServeHTTP(w, r)
|
||||
case HostAgentServiceTerminateProcedure:
|
||||
hostAgentServiceTerminateHandler.ServeHTTP(w, r)
|
||||
case HostAgentServiceGetSandboxMetricsProcedure:
|
||||
hostAgentServiceGetSandboxMetricsHandler.ServeHTTP(w, r)
|
||||
case HostAgentServiceFlushSandboxMetricsProcedure:
|
||||
hostAgentServiceFlushSandboxMetricsHandler.ServeHTTP(w, r)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
@ -550,3 +606,11 @@ func (UnimplementedHostAgentServiceHandler) PingSandbox(context.Context, *connec
|
||||
func (UnimplementedHostAgentServiceHandler) Terminate(context.Context, *connect.Request[gen.TerminateRequest]) (*connect.Response[gen.TerminateResponse], error) {
|
||||
return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.Terminate is not implemented"))
|
||||
}
|
||||
|
||||
func (UnimplementedHostAgentServiceHandler) GetSandboxMetrics(context.Context, *connect.Request[gen.GetSandboxMetricsRequest]) (*connect.Response[gen.GetSandboxMetricsResponse], error) {
|
||||
return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.GetSandboxMetrics is not implemented"))
|
||||
}
|
||||
|
||||
func (UnimplementedHostAgentServiceHandler) FlushSandboxMetrics(context.Context, *connect.Request[gen.FlushSandboxMetricsRequest]) (*connect.Response[gen.FlushSandboxMetricsResponse], error) {
|
||||
return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.FlushSandboxMetrics is not implemented"))
|
||||
}
|
||||
|
||||
@ -54,6 +54,13 @@ service HostAgentService {
|
||||
// agent shuts down without waiting for the next heartbeat cycle.
|
||||
rpc Terminate(TerminateRequest) returns (TerminateResponse);
|
||||
|
||||
// GetSandboxMetrics returns ring buffer metrics for a running sandbox.
|
||||
rpc GetSandboxMetrics(GetSandboxMetricsRequest) returns (GetSandboxMetricsResponse);
|
||||
|
||||
// FlushSandboxMetrics returns all ring buffer tiers and clears them.
|
||||
// Called by the control plane before pause/destroy to persist metrics to DB.
|
||||
rpc FlushSandboxMetrics(FlushSandboxMetricsRequest) returns (FlushSandboxMetricsResponse);
|
||||
|
||||
}
|
||||
|
||||
message CreateSandboxRequest {
|
||||
@ -248,3 +255,32 @@ message PingSandboxResponse {}
|
||||
message TerminateRequest {}
|
||||
|
||||
message TerminateResponse {}
|
||||
|
||||
// ── Metrics ──────────────────────────────────────────────────────────
|
||||
|
||||
message MetricPoint {
|
||||
int64 timestamp_unix = 1;
|
||||
double cpu_pct = 2;
|
||||
int64 mem_bytes = 3;
|
||||
int64 disk_bytes = 4;
|
||||
}
|
||||
|
||||
message GetSandboxMetricsRequest {
|
||||
string sandbox_id = 1;
|
||||
// Range tier: "10m", "2h", or "24h".
|
||||
string range = 2;
|
||||
}
|
||||
|
||||
message GetSandboxMetricsResponse {
|
||||
repeated MetricPoint points = 1;
|
||||
}
|
||||
|
||||
message FlushSandboxMetricsRequest {
|
||||
string sandbox_id = 1;
|
||||
}
|
||||
|
||||
message FlushSandboxMetricsResponse {
|
||||
repeated MetricPoint points_10m = 1;
|
||||
repeated MetricPoint points_2h = 2;
|
||||
repeated MetricPoint points_24h = 3;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user