From e3750f79f9ab6d658e885f46d69bb94c10237d15 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Wed, 25 Mar 2026 15:50:19 +0600 Subject: [PATCH] Fix metrics sampler to record zero-value snapshots when idle SampleSandboxMetrics previously filtered WHERE status IN ('running', 'starting', 'paused'), which returned no rows when all capsules were stopped. This caused zero snapshots to be skipped, leaving the time-series charts with no trailing data points instead of showing the expected zero values. Remove the WHERE filter so the query groups by all teams that have any sandbox row. The per-status FILTER clauses on the aggregates already produce correct zero counts for stopped capsules. Also includes the per-VM RAM ceiling formula change (sum(ceil(each/2)) instead of ceil(sum/2)). --- db/queries/metrics.sql | 12 +++++++----- internal/db/metrics.sql.go | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/db/queries/metrics.sql b/db/queries/metrics.sql index 43171e5..325df8d 100644 --- a/db/queries/metrics.sql +++ b/db/queries/metrics.sql @@ -5,12 +5,12 @@ VALUES ($1, $2, $3, $4); -- name: GetLiveMetrics :one -- Reads directly from sandboxes for accurate real-time current values. -- CPU reserved = running + starting only (paused VMs release CPU). --- RAM reserved = running + starting + ceil(paused/2) (capacity held for resume). +-- RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling). SELECT (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved, (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) - + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved + + COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved FROM sandboxes WHERE team_id = $1; @@ -29,14 +29,16 @@ WHERE sampled_at < NOW() - INTERVAL '60 days'; -- name: SampleSandboxMetrics :many -- Aggregates per-team resource usage from the live sandboxes table. +-- Groups by all teams that have any sandbox row (including stopped) so that +-- zero-value snapshots are recorded when all capsules are stopped, keeping the +-- time-series charts continuous rather than trailing off into empty space. -- CPU reserved = running + starting only (paused VMs release CPU). --- RAM reserved = running + starting + ceil(paused/2) (capacity held for resume). +-- RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling). SELECT team_id, (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved, (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) - + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved + + COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved FROM sandboxes -WHERE status IN ('running', 'starting', 'paused') GROUP BY team_id; diff --git a/internal/db/metrics.sql.go b/internal/db/metrics.sql.go index afa56d7..dffc039 100644 --- a/internal/db/metrics.sql.go +++ b/internal/db/metrics.sql.go @@ -14,7 +14,7 @@ SELECT (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved, (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) - + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved + + COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved FROM sandboxes WHERE team_id = $1 ` @@ -27,7 +27,7 @@ type GetLiveMetricsRow struct { // Reads directly from sandboxes for accurate real-time current values. // CPU reserved = running + starting only (paused VMs release CPU). -// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume). +// RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling). func (q *Queries) GetLiveMetrics(ctx context.Context, teamID string) (GetLiveMetricsRow, error) { row := q.db.QueryRow(ctx, getLiveMetrics, teamID) var i GetLiveMetricsRow @@ -96,9 +96,8 @@ SELECT (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved, (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) - + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved + + COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved FROM sandboxes -WHERE status IN ('running', 'starting', 'paused') GROUP BY team_id ` @@ -110,8 +109,11 @@ type SampleSandboxMetricsRow struct { } // Aggregates per-team resource usage from the live sandboxes table. +// Groups by all teams that have any sandbox row (including stopped) so that +// zero-value snapshots are recorded when all capsules are stopped, keeping the +// time-series charts continuous rather than trailing off into empty space. // CPU reserved = running + starting only (paused VMs release CPU). -// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume). +// RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling). func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) { rows, err := q.db.Query(ctx, sampleSandboxMetrics) if err != nil {