1
0
forked from wrenn/wrenn

Add per-sandbox CPU/memory/disk metrics collection

Samples /proc/{fc_pid}/stat (CPU%), /proc/{fc_pid}/status (VmRSS), and
stat() on CoW files at 500ms intervals per running sandbox. Three tiered
ring buffers downsample into 30s and 5min averages for 10min/2h/24h
retention. Metrics are flushed to DB on pause (all tiers) and destroy
(24h only). New GetSandboxMetrics and FlushSandboxMetrics RPCs on the
host agent, proxied through GET /v1/sandboxes/{id}/metrics?range= on
the control plane. Returns live data for running sandboxes, DB data for
paused, and 404 for stopped.
This commit is contained in:
2026-03-25 20:10:33 +06:00
parent 7473c15f52
commit 9acdbb5ae9
16 changed files with 1430 additions and 90 deletions

View File

@ -426,3 +426,55 @@ func (s *Server) Terminate(
}
return connect.NewResponse(&pb.TerminateResponse{}), nil
}
func (s *Server) GetSandboxMetrics(
_ context.Context,
req *connect.Request[pb.GetSandboxMetricsRequest],
) (*connect.Response[pb.GetSandboxMetricsResponse], error) {
msg := req.Msg
points, err := s.mgr.GetMetrics(msg.SandboxId, msg.Range)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return nil, connect.NewError(connect.CodeNotFound, err)
}
if strings.Contains(err.Error(), "invalid range") {
return nil, connect.NewError(connect.CodeInvalidArgument, err)
}
return nil, connect.NewError(connect.CodeInternal, err)
}
return connect.NewResponse(&pb.GetSandboxMetricsResponse{Points: metricPointsToPB(points)}), nil
}
func (s *Server) FlushSandboxMetrics(
_ context.Context,
req *connect.Request[pb.FlushSandboxMetricsRequest],
) (*connect.Response[pb.FlushSandboxMetricsResponse], error) {
pts10m, pts2h, pts24h, err := s.mgr.FlushMetrics(req.Msg.SandboxId)
if err != nil {
if strings.Contains(err.Error(), "not found") {
return nil, connect.NewError(connect.CodeNotFound, err)
}
return nil, connect.NewError(connect.CodeInternal, err)
}
return connect.NewResponse(&pb.FlushSandboxMetricsResponse{
Points_10M: metricPointsToPB(pts10m),
Points_2H: metricPointsToPB(pts2h),
Points_24H: metricPointsToPB(pts24h),
}), nil
}
func metricPointsToPB(pts []sandbox.MetricPoint) []*pb.MetricPoint {
out := make([]*pb.MetricPoint, len(pts))
for i, p := range pts {
out[i] = &pb.MetricPoint{
TimestampUnix: p.Timestamp.Unix(),
CpuPct: p.CPUPct,
MemBytes: p.MemBytes,
DiskBytes: p.DiskBytes,
}
}
return out
}