1
0
forked from wrenn/wrenn

Fix metrics correctness, redesign stats page

- Replace stale snapshot read (GetCurrentMetrics) with live query
  (GetLiveMetrics) against sandboxes table — always returns correct
  zeros when no capsules are running
- Fix CPU reserved formula: running + starting only; paused VMs no
  longer contribute vCPUs (RAM reservation for paused unchanged)
- Merge top cards into 3 paired Now/Peak cards with colored accent
  borders (green/blue/amber matching chart colors)
- Move Live badge from Running Capsules card to page-level header
- Add colored category dots to card and chart headers
- Charts stacked vertically, flex-1 to fill remaining page height
- vCPUs chart color changed to blue (#5a9fd4), RAM stays amber
This commit is contained in:
2026-03-25 15:11:46 +06:00
parent fee66bda50
commit 47b0ed5b52
5 changed files with 185 additions and 167 deletions

View File

@ -18,10 +18,9 @@ func newStatsHandler(svc *service.StatsService) *statsHandler {
}
type statsCurrentResponse struct {
RunningCount int32 `json:"running_count"`
VCPUsReserved int32 `json:"vcpus_reserved"`
MemoryMBReserved int32 `json:"memory_mb_reserved"`
SampledAt string `json:"sampled_at,omitempty"`
RunningCount int32 `json:"running_count"`
VCPUsReserved int32 `json:"vcpus_reserved"`
MemoryMBReserved int32 `json:"memory_mb_reserved"`
}
type statsPeaksResponse struct {
@ -85,10 +84,6 @@ func (h *statsHandler) GetStats(w http.ResponseWriter, r *http.Request) {
},
}
if !current.SampledAt.IsZero() {
resp.Current.SampledAt = current.SampledAt.UTC().Format(time.RFC3339)
}
for i, pt := range series {
resp.Series.Labels[i] = pt.Bucket.UTC().Format(time.RFC3339)
resp.Series.Running[i] = pt.RunningCount

View File

@ -7,34 +7,31 @@ package db
import (
"context"
"github.com/jackc/pgx/v5/pgtype"
)
const getCurrentMetrics = `-- name: GetCurrentMetrics :one
SELECT running_count, vcpus_reserved, memory_mb_reserved, sampled_at
FROM sandbox_metrics_snapshots
const getLiveMetrics = `-- name: GetLiveMetrics :one
SELECT
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
FROM sandboxes
WHERE team_id = $1
ORDER BY sampled_at DESC
LIMIT 1
`
type GetCurrentMetricsRow struct {
RunningCount int32 `json:"running_count"`
VcpusReserved int32 `json:"vcpus_reserved"`
MemoryMbReserved int32 `json:"memory_mb_reserved"`
SampledAt pgtype.Timestamptz `json:"sampled_at"`
type GetLiveMetricsRow struct {
RunningCount int32 `json:"running_count"`
VcpusReserved int32 `json:"vcpus_reserved"`
MemoryMbReserved int32 `json:"memory_mb_reserved"`
}
func (q *Queries) GetCurrentMetrics(ctx context.Context, teamID string) (GetCurrentMetricsRow, error) {
row := q.db.QueryRow(ctx, getCurrentMetrics, teamID)
var i GetCurrentMetricsRow
err := row.Scan(
&i.RunningCount,
&i.VcpusReserved,
&i.MemoryMbReserved,
&i.SampledAt,
)
// Reads directly from sandboxes for accurate real-time current values.
// CPU reserved = running + starting only (paused VMs release CPU).
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
func (q *Queries) GetLiveMetrics(ctx context.Context, teamID string) (GetLiveMetricsRow, error) {
row := q.db.QueryRow(ctx, getLiveMetrics, teamID)
var i GetLiveMetricsRow
err := row.Scan(&i.RunningCount, &i.VcpusReserved, &i.MemoryMbReserved)
return i, err
}
@ -97,8 +94,7 @@ const sampleSandboxMetrics = `-- name: SampleSandboxMetrics :many
SELECT
team_id,
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0)
+ CEIL(COALESCE(SUM(vcpus) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS vcpus_reserved,
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
FROM sandboxes
@ -114,7 +110,8 @@ type SampleSandboxMetricsRow struct {
}
// Aggregates per-team resource usage from the live sandboxes table.
// paused sandboxes count at 50% (ceil) for capacity reservation.
// CPU reserved = running + starting only (paused VMs release CPU).
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) {
rows, err := q.db.Query(ctx, sampleSandboxMetrics)
if err != nil {

View File

@ -50,12 +50,11 @@ type StatPoint struct {
MemoryMBReserved int32
}
// CurrentStats holds the most recent sampled values for a team.
// CurrentStats holds the live values for a team, read directly from sandboxes.
type CurrentStats struct {
RunningCount int32
VCPUsReserved int32
MemoryMBReserved int32
SampledAt time.Time
}
// PeakStats holds the 30-day maximum values for a team.
@ -79,19 +78,16 @@ func (s *StatsService) GetStats(ctx context.Context, teamID string, r TimeRange)
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("unknown range: %s", r)
}
// Current snapshot.
var current CurrentStats
cur, err := s.DB.GetCurrentMetrics(ctx, teamID)
if err != nil && !errors.Is(err, pgx.ErrNoRows) {
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get current metrics: %w", err)
// Current live values — read directly from sandboxes so we always reflect
// the true state even when no capsules are running.
cur, err := s.DB.GetLiveMetrics(ctx, teamID)
if err != nil {
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get live metrics: %w", err)
}
if err == nil {
current = CurrentStats{
RunningCount: cur.RunningCount,
VCPUsReserved: cur.VcpusReserved,
MemoryMBReserved: cur.MemoryMbReserved,
SampledAt: cur.SampledAt.Time,
}
current := CurrentStats{
RunningCount: cur.RunningCount,
VCPUsReserved: cur.VcpusReserved,
MemoryMBReserved: cur.MemoryMbReserved,
}
// 30-day peaks.