forked from wrenn/wrenn
Fix metrics correctness, redesign stats page
- Replace stale snapshot read (GetCurrentMetrics) with live query (GetLiveMetrics) against sandboxes table — always returns correct zeros when no capsules are running - Fix CPU reserved formula: running + starting only; paused VMs no longer contribute vCPUs (RAM reservation for paused unchanged) - Merge top cards into 3 paired Now/Peak cards with colored accent borders (green/blue/amber matching chart colors) - Move Live badge from Running Capsules card to page-level header - Add colored category dots to card and chart headers - Charts stacked vertically, flex-1 to fill remaining page height - vCPUs chart color changed to blue (#5a9fd4), RAM stays amber
This commit is contained in:
@ -18,10 +18,9 @@ func newStatsHandler(svc *service.StatsService) *statsHandler {
|
||||
}
|
||||
|
||||
type statsCurrentResponse struct {
|
||||
RunningCount int32 `json:"running_count"`
|
||||
VCPUsReserved int32 `json:"vcpus_reserved"`
|
||||
MemoryMBReserved int32 `json:"memory_mb_reserved"`
|
||||
SampledAt string `json:"sampled_at,omitempty"`
|
||||
RunningCount int32 `json:"running_count"`
|
||||
VCPUsReserved int32 `json:"vcpus_reserved"`
|
||||
MemoryMBReserved int32 `json:"memory_mb_reserved"`
|
||||
}
|
||||
|
||||
type statsPeaksResponse struct {
|
||||
@ -85,10 +84,6 @@ func (h *statsHandler) GetStats(w http.ResponseWriter, r *http.Request) {
|
||||
},
|
||||
}
|
||||
|
||||
if !current.SampledAt.IsZero() {
|
||||
resp.Current.SampledAt = current.SampledAt.UTC().Format(time.RFC3339)
|
||||
}
|
||||
|
||||
for i, pt := range series {
|
||||
resp.Series.Labels[i] = pt.Bucket.UTC().Format(time.RFC3339)
|
||||
resp.Series.Running[i] = pt.RunningCount
|
||||
|
||||
@ -7,34 +7,31 @@ package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
)
|
||||
|
||||
const getCurrentMetrics = `-- name: GetCurrentMetrics :one
|
||||
SELECT running_count, vcpus_reserved, memory_mb_reserved, sampled_at
|
||||
FROM sandbox_metrics_snapshots
|
||||
const getLiveMetrics = `-- name: GetLiveMetrics :one
|
||||
SELECT
|
||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
||||
FROM sandboxes
|
||||
WHERE team_id = $1
|
||||
ORDER BY sampled_at DESC
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
type GetCurrentMetricsRow struct {
|
||||
RunningCount int32 `json:"running_count"`
|
||||
VcpusReserved int32 `json:"vcpus_reserved"`
|
||||
MemoryMbReserved int32 `json:"memory_mb_reserved"`
|
||||
SampledAt pgtype.Timestamptz `json:"sampled_at"`
|
||||
type GetLiveMetricsRow struct {
|
||||
RunningCount int32 `json:"running_count"`
|
||||
VcpusReserved int32 `json:"vcpus_reserved"`
|
||||
MemoryMbReserved int32 `json:"memory_mb_reserved"`
|
||||
}
|
||||
|
||||
func (q *Queries) GetCurrentMetrics(ctx context.Context, teamID string) (GetCurrentMetricsRow, error) {
|
||||
row := q.db.QueryRow(ctx, getCurrentMetrics, teamID)
|
||||
var i GetCurrentMetricsRow
|
||||
err := row.Scan(
|
||||
&i.RunningCount,
|
||||
&i.VcpusReserved,
|
||||
&i.MemoryMbReserved,
|
||||
&i.SampledAt,
|
||||
)
|
||||
// Reads directly from sandboxes for accurate real-time current values.
|
||||
// CPU reserved = running + starting only (paused VMs release CPU).
|
||||
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
||||
func (q *Queries) GetLiveMetrics(ctx context.Context, teamID string) (GetLiveMetricsRow, error) {
|
||||
row := q.db.QueryRow(ctx, getLiveMetrics, teamID)
|
||||
var i GetLiveMetricsRow
|
||||
err := row.Scan(&i.RunningCount, &i.VcpusReserved, &i.MemoryMbReserved)
|
||||
return i, err
|
||||
}
|
||||
|
||||
@ -97,8 +94,7 @@ const sampleSandboxMetrics = `-- name: SampleSandboxMetrics :many
|
||||
SELECT
|
||||
team_id,
|
||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||
+ CEIL(COALESCE(SUM(vcpus) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS vcpus_reserved,
|
||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
||||
FROM sandboxes
|
||||
@ -114,7 +110,8 @@ type SampleSandboxMetricsRow struct {
|
||||
}
|
||||
|
||||
// Aggregates per-team resource usage from the live sandboxes table.
|
||||
// paused sandboxes count at 50% (ceil) for capacity reservation.
|
||||
// CPU reserved = running + starting only (paused VMs release CPU).
|
||||
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
||||
func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) {
|
||||
rows, err := q.db.Query(ctx, sampleSandboxMetrics)
|
||||
if err != nil {
|
||||
|
||||
@ -50,12 +50,11 @@ type StatPoint struct {
|
||||
MemoryMBReserved int32
|
||||
}
|
||||
|
||||
// CurrentStats holds the most recent sampled values for a team.
|
||||
// CurrentStats holds the live values for a team, read directly from sandboxes.
|
||||
type CurrentStats struct {
|
||||
RunningCount int32
|
||||
VCPUsReserved int32
|
||||
MemoryMBReserved int32
|
||||
SampledAt time.Time
|
||||
}
|
||||
|
||||
// PeakStats holds the 30-day maximum values for a team.
|
||||
@ -79,19 +78,16 @@ func (s *StatsService) GetStats(ctx context.Context, teamID string, r TimeRange)
|
||||
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("unknown range: %s", r)
|
||||
}
|
||||
|
||||
// Current snapshot.
|
||||
var current CurrentStats
|
||||
cur, err := s.DB.GetCurrentMetrics(ctx, teamID)
|
||||
if err != nil && !errors.Is(err, pgx.ErrNoRows) {
|
||||
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get current metrics: %w", err)
|
||||
// Current live values — read directly from sandboxes so we always reflect
|
||||
// the true state even when no capsules are running.
|
||||
cur, err := s.DB.GetLiveMetrics(ctx, teamID)
|
||||
if err != nil {
|
||||
return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get live metrics: %w", err)
|
||||
}
|
||||
if err == nil {
|
||||
current = CurrentStats{
|
||||
RunningCount: cur.RunningCount,
|
||||
VCPUsReserved: cur.VcpusReserved,
|
||||
MemoryMBReserved: cur.MemoryMbReserved,
|
||||
SampledAt: cur.SampledAt.Time,
|
||||
}
|
||||
current := CurrentStats{
|
||||
RunningCount: cur.RunningCount,
|
||||
VCPUsReserved: cur.VcpusReserved,
|
||||
MemoryMBReserved: cur.MemoryMbReserved,
|
||||
}
|
||||
|
||||
// 30-day peaks.
|
||||
|
||||
Reference in New Issue
Block a user