forked from wrenn/wrenn
Fix metrics sampler to record zero-value snapshots when idle
SampleSandboxMetrics previously filtered WHERE status IN ('running',
'starting', 'paused'), which returned no rows when all capsules were
stopped. This caused zero snapshots to be skipped, leaving the
time-series charts with no trailing data points instead of showing
the expected zero values.
Remove the WHERE filter so the query groups by all teams that have
any sandbox row. The per-status FILTER clauses on the aggregates
already produce correct zero counts for stopped capsules.
Also includes the per-VM RAM ceiling formula change (sum(ceil(each/2))
instead of ceil(sum/2)).
This commit is contained in:
@ -5,12 +5,12 @@ VALUES ($1, $2, $3, $4);
|
|||||||
-- name: GetLiveMetrics :one
|
-- name: GetLiveMetrics :one
|
||||||
-- Reads directly from sandboxes for accurate real-time current values.
|
-- Reads directly from sandboxes for accurate real-time current values.
|
||||||
-- CPU reserved = running + starting only (paused VMs release CPU).
|
-- CPU reserved = running + starting only (paused VMs release CPU).
|
||||||
-- RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
-- RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling).
|
||||||
SELECT
|
SELECT
|
||||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
+ COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved
|
||||||
FROM sandboxes
|
FROM sandboxes
|
||||||
WHERE team_id = $1;
|
WHERE team_id = $1;
|
||||||
|
|
||||||
@ -29,14 +29,16 @@ WHERE sampled_at < NOW() - INTERVAL '60 days';
|
|||||||
|
|
||||||
-- name: SampleSandboxMetrics :many
|
-- name: SampleSandboxMetrics :many
|
||||||
-- Aggregates per-team resource usage from the live sandboxes table.
|
-- Aggregates per-team resource usage from the live sandboxes table.
|
||||||
|
-- Groups by all teams that have any sandbox row (including stopped) so that
|
||||||
|
-- zero-value snapshots are recorded when all capsules are stopped, keeping the
|
||||||
|
-- time-series charts continuous rather than trailing off into empty space.
|
||||||
-- CPU reserved = running + starting only (paused VMs release CPU).
|
-- CPU reserved = running + starting only (paused VMs release CPU).
|
||||||
-- RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
-- RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling).
|
||||||
SELECT
|
SELECT
|
||||||
team_id,
|
team_id,
|
||||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
+ COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved
|
||||||
FROM sandboxes
|
FROM sandboxes
|
||||||
WHERE status IN ('running', 'starting', 'paused')
|
|
||||||
GROUP BY team_id;
|
GROUP BY team_id;
|
||||||
|
|||||||
@ -14,7 +14,7 @@ SELECT
|
|||||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
+ COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved
|
||||||
FROM sandboxes
|
FROM sandboxes
|
||||||
WHERE team_id = $1
|
WHERE team_id = $1
|
||||||
`
|
`
|
||||||
@ -27,7 +27,7 @@ type GetLiveMetricsRow struct {
|
|||||||
|
|
||||||
// Reads directly from sandboxes for accurate real-time current values.
|
// Reads directly from sandboxes for accurate real-time current values.
|
||||||
// CPU reserved = running + starting only (paused VMs release CPU).
|
// CPU reserved = running + starting only (paused VMs release CPU).
|
||||||
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
// RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling).
|
||||||
func (q *Queries) GetLiveMetrics(ctx context.Context, teamID string) (GetLiveMetricsRow, error) {
|
func (q *Queries) GetLiveMetrics(ctx context.Context, teamID string) (GetLiveMetricsRow, error) {
|
||||||
row := q.db.QueryRow(ctx, getLiveMetrics, teamID)
|
row := q.db.QueryRow(ctx, getLiveMetrics, teamID)
|
||||||
var i GetLiveMetricsRow
|
var i GetLiveMetricsRow
|
||||||
@ -96,9 +96,8 @@ SELECT
|
|||||||
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
(COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count,
|
||||||
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
(COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0))::INTEGER AS vcpus_reserved,
|
||||||
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
(COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0)
|
||||||
+ CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved
|
+ COALESCE(SUM(CEIL(memory_mb::NUMERIC / 2)) FILTER (WHERE status = 'paused'), 0))::INTEGER AS memory_mb_reserved
|
||||||
FROM sandboxes
|
FROM sandboxes
|
||||||
WHERE status IN ('running', 'starting', 'paused')
|
|
||||||
GROUP BY team_id
|
GROUP BY team_id
|
||||||
`
|
`
|
||||||
|
|
||||||
@ -110,8 +109,11 @@ type SampleSandboxMetricsRow struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Aggregates per-team resource usage from the live sandboxes table.
|
// Aggregates per-team resource usage from the live sandboxes table.
|
||||||
|
// Groups by all teams that have any sandbox row (including stopped) so that
|
||||||
|
// zero-value snapshots are recorded when all capsules are stopped, keeping the
|
||||||
|
// time-series charts continuous rather than trailing off into empty space.
|
||||||
// CPU reserved = running + starting only (paused VMs release CPU).
|
// CPU reserved = running + starting only (paused VMs release CPU).
|
||||||
// RAM reserved = running + starting + ceil(paused/2) (capacity held for resume).
|
// RAM reserved = running + starting + sum(ceil(each_paused/2)) (per-VM ceiling).
|
||||||
func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) {
|
func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) {
|
||||||
rows, err := q.db.Query(ctx, sampleSandboxMetrics)
|
rows, err := q.db.Query(ctx, sampleSandboxMetrics)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user