1
0
forked from wrenn/wrenn

Add live stats page with metrics sampling and route split

- New sandbox_metrics_snapshots table sampled every 10s (60-day retention)
- Background MetricsSampler goroutine wired into control plane startup
- GET /v1/sandboxes/stats?range=5m|1h|6h|24h|30d endpoint with adaptive
  polling intervals; reserved CPU/RAM uses ceil(paused/2) formula
- StatsPanel component: 4 stat cards + 2 Chart.js line charts (straight
  lines, integer y-axis for running count, dual-axis for CPU/RAM)
- Range filter persisted in URL query param; polls update data silently
  (no blink — loading state only shown on initial mount)
- Split /dashboard/capsules into /list and /stats sub-routes with shared
  layout; capsuleRunningCount store syncs badge across routes
- CreateCapsuleDialog extracted as reusable component
This commit is contained in:
2026-03-25 14:41:05 +06:00
parent 2349f585ae
commit fee66bda50
21 changed files with 2059 additions and 1023 deletions

View File

@ -0,0 +1,100 @@
package api
import (
"log/slog"
"net/http"
"time"
"git.omukk.dev/wrenn/sandbox/internal/auth"
"git.omukk.dev/wrenn/sandbox/internal/service"
)
type statsHandler struct {
svc *service.StatsService
}
func newStatsHandler(svc *service.StatsService) *statsHandler {
return &statsHandler{svc: svc}
}
type statsCurrentResponse struct {
RunningCount int32 `json:"running_count"`
VCPUsReserved int32 `json:"vcpus_reserved"`
MemoryMBReserved int32 `json:"memory_mb_reserved"`
SampledAt string `json:"sampled_at,omitempty"`
}
type statsPeaksResponse struct {
RunningCount int32 `json:"running_count"`
VCPUs int32 `json:"vcpus"`
MemoryMB int32 `json:"memory_mb"`
}
type statsSeriesResponse struct {
Labels []string `json:"labels"`
Running []int32 `json:"running"`
VCPUs []int32 `json:"vcpus"`
MemoryMB []int32 `json:"memory_mb"`
}
type statsResponse struct {
Range string `json:"range"`
Current statsCurrentResponse `json:"current"`
Peaks statsPeaksResponse `json:"peaks"`
Series statsSeriesResponse `json:"series"`
}
// GetStats handles GET /v1/sandboxes/stats?range=5m|1h|6h|24h|30d
func (h *statsHandler) GetStats(w http.ResponseWriter, r *http.Request) {
ac := auth.MustFromContext(r.Context())
rangeParam := r.URL.Query().Get("range")
if rangeParam == "" {
rangeParam = string(service.Range1h)
}
tr := service.TimeRange(rangeParam)
if !service.ValidRange(tr) {
writeError(w, http.StatusBadRequest, "invalid_request", "range must be one of: 5m, 1h, 6h, 24h, 30d")
return
}
current, peaks, series, err := h.svc.GetStats(r.Context(), ac.TeamID, tr)
if err != nil {
slog.Error("stats handler: get stats failed", "team_id", ac.TeamID, "error", err)
writeError(w, http.StatusInternalServerError, "internal_error", "failed to retrieve stats")
return
}
resp := statsResponse{
Range: rangeParam,
Current: statsCurrentResponse{
RunningCount: current.RunningCount,
VCPUsReserved: current.VCPUsReserved,
MemoryMBReserved: current.MemoryMBReserved,
},
Peaks: statsPeaksResponse{
RunningCount: peaks.RunningCount,
VCPUs: peaks.VCPUs,
MemoryMB: peaks.MemoryMB,
},
Series: statsSeriesResponse{
Labels: make([]string, len(series)),
Running: make([]int32, len(series)),
VCPUs: make([]int32, len(series)),
MemoryMB: make([]int32, len(series)),
},
}
if !current.SampledAt.IsZero() {
resp.Current.SampledAt = current.SampledAt.UTC().Format(time.RFC3339)
}
for i, pt := range series {
resp.Series.Labels[i] = pt.Bucket.UTC().Format(time.RFC3339)
resp.Series.Running[i] = pt.RunningCount
resp.Series.VCPUs[i] = pt.VCPUsReserved
resp.Series.MemoryMB[i] = pt.MemoryMBReserved
}
writeJSON(w, http.StatusOK, resp)
}

View File

@ -0,0 +1,68 @@
package api
import (
"context"
"log/slog"
"time"
"git.omukk.dev/wrenn/sandbox/internal/db"
)
// MetricsSampler records per-team sandbox resource usage to
// sandbox_metrics_snapshots every interval. It also prunes rows older than
// 60 days on each tick to keep the table bounded.
type MetricsSampler struct {
db *db.Queries
interval time.Duration
}
// NewMetricsSampler creates a MetricsSampler.
func NewMetricsSampler(queries *db.Queries, interval time.Duration) *MetricsSampler {
return &MetricsSampler{db: queries, interval: interval}
}
// Start runs the sampler loop until the context is cancelled.
func (s *MetricsSampler) Start(ctx context.Context) {
go func() {
ticker := time.NewTicker(s.interval)
defer ticker.Stop()
// Sample immediately on startup.
s.run(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
s.run(ctx)
}
}
}()
}
func (s *MetricsSampler) run(ctx context.Context) {
s.prune(ctx)
if err := s.sample(ctx); err != nil {
slog.Warn("metrics sampler: sample failed", "error", err)
}
}
func (s *MetricsSampler) sample(ctx context.Context) error {
rows, err := s.db.SampleSandboxMetrics(ctx)
if err != nil {
return err
}
for _, row := range rows {
if err := s.db.InsertMetricsSnapshot(ctx, db.InsertMetricsSnapshotParams(row)); err != nil {
slog.Warn("metrics sampler: insert snapshot failed", "team_id", row.TeamID, "error", err)
}
}
return nil
}
func (s *MetricsSampler) prune(ctx context.Context) {
if err := s.db.PruneOldMetrics(ctx); err != nil {
slog.Warn("metrics sampler: prune failed", "error", err)
}
}

View File

@ -613,6 +613,32 @@ paths:
items:
$ref: "#/components/schemas/Sandbox"
/v1/sandboxes/stats:
get:
summary: Get sandbox usage stats for your team
operationId: getSandboxStats
tags: [sandboxes]
security:
- apiKeyAuth: []
parameters:
- name: range
in: query
required: false
schema:
type: string
enum: [5m, 1h, 6h, 24h, 30d]
default: 1h
description: Time window for the time-series data.
responses:
"200":
description: Sandbox stats for the team
content:
application/json:
schema:
$ref: "#/components/schemas/SandboxStats"
"400":
$ref: "#/components/responses/BadRequest"
/v1/sandboxes/{id}:
parameters:
- name: id
@ -1578,6 +1604,57 @@ components:
after this duration of inactivity (no exec or ping). 0 means
no auto-pause.
SandboxStats:
type: object
properties:
range:
type: string
enum: [5m, 1h, 6h, 24h, 30d]
current:
type: object
properties:
running_count:
type: integer
vcpus_reserved:
type: integer
memory_mb_reserved:
type: integer
sampled_at:
type: string
format: date-time
nullable: true
peaks:
type: object
description: Maximum values over the last 30 days.
properties:
running_count:
type: integer
vcpus:
type: integer
memory_mb:
type: integer
series:
type: object
description: Parallel arrays for chart rendering.
properties:
labels:
type: array
items:
type: string
format: date-time
running:
type: array
items:
type: integer
vcpus:
type: array
items:
type: integer
memory_mb:
type: array
items:
type: integer
Sandbox:
type: object
properties:

View File

@ -46,6 +46,7 @@ func New(
hostSvc := &service.HostService{DB: queries, Redis: rdb, JWT: jwtSecret, Pool: pool}
teamSvc := &service.TeamService{DB: queries, Pool: pgPool, HostPool: pool}
auditSvc := &service.AuditService{DB: queries}
statsSvc := &service.StatsService{DB: queries, Pool: pgPool}
al := audit.New(queries)
@ -62,6 +63,7 @@ func New(
teamH := newTeamHandler(teamSvc, al)
usersH := newUsersHandler(teamSvc)
auditH := newAuditHandler(auditSvc)
statsH := newStatsHandler(statsSvc)
// OpenAPI spec and docs.
r.Get("/openapi.yaml", serveOpenAPI)
@ -109,6 +111,7 @@ func New(
r.Use(requireAPIKeyOrJWT(queries, jwtSecret))
r.Post("/", sandbox.Create)
r.Get("/", sandbox.List)
r.Get("/stats", statsH.GetStats)
r.Route("/{id}", func(r chi.Router) {
r.Get("/", sandbox.Get)