From fee66bda501653e208eee2d4746e34a33ff56da7 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Wed, 25 Mar 2026 14:41:05 +0600 Subject: [PATCH] Add live stats page with metrics sampling and route split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New sandbox_metrics_snapshots table sampled every 10s (60-day retention) - Background MetricsSampler goroutine wired into control plane startup - GET /v1/sandboxes/stats?range=5m|1h|6h|24h|30d endpoint with adaptive polling intervals; reserved CPU/RAM uses ceil(paused/2) formula - StatsPanel component: 4 stat cards + 2 Chart.js line charts (straight lines, integer y-axis for running count, dual-axis for CPU/RAM) - Range filter persisted in URL query param; polls update data silently (no blink — loading state only shown on initial mount) - Split /dashboard/capsules into /list and /stats sub-routes with shared layout; capsuleRunningCount store syncs badge across routes - CreateCapsuleDialog extracted as reusable component --- cmd/control-plane/main.go | 4 + .../20260325074949_metrics_snapshots.sql | 18 + db/queries/metrics.sql | 37 + frontend/package.json | 3 + frontend/pnpm-lock.yaml | 17 + frontend/src/lib/api/stats.ts | 44 + frontend/src/lib/capsule-store.svelte.ts | 3 + .../lib/components/CreateCapsuleDialog.svelte | 124 ++ frontend/src/lib/components/StatsPanel.svelte | 395 +++++++ .../routes/dashboard/capsules/+layout.svelte | 102 ++ .../routes/dashboard/capsules/+page.svelte | 1024 +---------------- .../src/routes/dashboard/capsules/+page.ts | 5 + .../dashboard/capsules/list/+page.svelte | 734 ++++++++++++ .../dashboard/capsules/stats/+page.svelte | 17 + internal/api/handlers_stats.go | 100 ++ internal/api/metrics_sampler.go | 68 ++ internal/api/openapi.yaml | 77 ++ internal/api/server.go | 3 + internal/db/metrics.sql.go | 141 +++ internal/db/models.go | 9 + internal/service/stats.go | 157 +++ 21 files changed, 2059 insertions(+), 1023 deletions(-) create mode 100644 db/migrations/20260325074949_metrics_snapshots.sql create mode 100644 db/queries/metrics.sql create mode 100644 frontend/src/lib/api/stats.ts create mode 100644 frontend/src/lib/capsule-store.svelte.ts create mode 100644 frontend/src/lib/components/CreateCapsuleDialog.svelte create mode 100644 frontend/src/lib/components/StatsPanel.svelte create mode 100644 frontend/src/routes/dashboard/capsules/+layout.svelte create mode 100644 frontend/src/routes/dashboard/capsules/+page.ts create mode 100644 frontend/src/routes/dashboard/capsules/list/+page.svelte create mode 100644 frontend/src/routes/dashboard/capsules/stats/+page.svelte create mode 100644 internal/api/handlers_stats.go create mode 100644 internal/api/metrics_sampler.go create mode 100644 internal/db/metrics.sql.go create mode 100644 internal/service/stats.go diff --git a/cmd/control-plane/main.go b/cmd/control-plane/main.go index a7d2371..9f84edc 100644 --- a/cmd/control-plane/main.go +++ b/cmd/control-plane/main.go @@ -94,6 +94,10 @@ func main() { monitor := api.NewHostMonitor(queries, hostPool, audit.New(queries), 30*time.Second) monitor.Start(ctx) + // Start metrics sampler (records per-team sandbox stats every 10s). + sampler := api.NewMetricsSampler(queries, 10*time.Second) + sampler.Start(ctx) + httpServer := &http.Server{ Addr: cfg.ListenAddr, Handler: srv.Handler(), diff --git a/db/migrations/20260325074949_metrics_snapshots.sql b/db/migrations/20260325074949_metrics_snapshots.sql new file mode 100644 index 0000000..7d373e8 --- /dev/null +++ b/db/migrations/20260325074949_metrics_snapshots.sql @@ -0,0 +1,18 @@ +-- +goose Up + +CREATE TABLE sandbox_metrics_snapshots ( + id BIGSERIAL PRIMARY KEY, + team_id TEXT NOT NULL, + sampled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + running_count INTEGER NOT NULL, + vcpus_reserved INTEGER NOT NULL, + memory_mb_reserved INTEGER NOT NULL +); + +-- All queries filter on team_id first then range-scan sampled_at. +CREATE INDEX idx_metrics_snapshots_team_time + ON sandbox_metrics_snapshots (team_id, sampled_at DESC); + +-- +goose Down + +DROP TABLE sandbox_metrics_snapshots; diff --git a/db/queries/metrics.sql b/db/queries/metrics.sql new file mode 100644 index 0000000..6cd805f --- /dev/null +++ b/db/queries/metrics.sql @@ -0,0 +1,37 @@ +-- name: InsertMetricsSnapshot :exec +INSERT INTO sandbox_metrics_snapshots (team_id, running_count, vcpus_reserved, memory_mb_reserved) +VALUES ($1, $2, $3, $4); + +-- name: GetCurrentMetrics :one +SELECT running_count, vcpus_reserved, memory_mb_reserved, sampled_at +FROM sandbox_metrics_snapshots +WHERE team_id = $1 +ORDER BY sampled_at DESC +LIMIT 1; + +-- name: GetPeakMetrics :one +SELECT + COALESCE(MAX(running_count), 0)::INTEGER AS peak_running_count, + COALESCE(MAX(vcpus_reserved), 0)::INTEGER AS peak_vcpus, + COALESCE(MAX(memory_mb_reserved), 0)::INTEGER AS peak_memory_mb +FROM sandbox_metrics_snapshots +WHERE team_id = $1 + AND sampled_at > NOW() - INTERVAL '30 days'; + +-- name: PruneOldMetrics :exec +DELETE FROM sandbox_metrics_snapshots +WHERE sampled_at < NOW() - INTERVAL '60 days'; + +-- name: SampleSandboxMetrics :many +-- Aggregates per-team resource usage from the live sandboxes table. +-- paused sandboxes count at 50% (ceil) for capacity reservation. +SELECT + team_id, + (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, + (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0) + + CEIL(COALESCE(SUM(vcpus) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS vcpus_reserved, + (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) + + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved +FROM sandboxes +WHERE status IN ('running', 'starting', 'paused') +GROUP BY team_id; diff --git a/frontend/package.json b/frontend/package.json index f694403..85030ec 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -26,5 +26,8 @@ "tailwindcss": "^4.2.1", "typescript": "^5.9.3", "vite": "^7.3.1" + }, + "dependencies": { + "chart.js": "^4.5.1" } } diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 9f0353e..5b60992 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -7,6 +7,10 @@ settings: importers: .: + dependencies: + chart.js: + specifier: ^4.5.1 + version: 4.5.1 devDependencies: '@fontsource-variable/jetbrains-mono': specifier: ^5.2.8 @@ -249,6 +253,9 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@kurkle/color@0.3.4': + resolution: {integrity: sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==} + '@polka/url@1.0.0-next.29': resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==} @@ -547,6 +554,10 @@ packages: '@internationalized/date': ^3.8.1 svelte: ^5.33.0 + chart.js@4.5.1: + resolution: {integrity: sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==} + engines: {pnpm: '>=8'} + chokidar@4.0.3: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} @@ -980,6 +991,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@kurkle/color@0.3.4': {} + '@polka/url@1.0.0-next.29': {} '@rollup/rollup-android-arm-eabi@4.59.0': @@ -1203,6 +1216,10 @@ snapshots: transitivePeerDependencies: - '@sveltejs/kit' + chart.js@4.5.1: + dependencies: + '@kurkle/color': 0.3.4 + chokidar@4.0.3: dependencies: readdirp: 4.1.2 diff --git a/frontend/src/lib/api/stats.ts b/frontend/src/lib/api/stats.ts new file mode 100644 index 0000000..3f85483 --- /dev/null +++ b/frontend/src/lib/api/stats.ts @@ -0,0 +1,44 @@ +import { apiFetch, type ApiResult } from '$lib/api/client'; + +export type TimeRange = '5m' | '1h' | '6h' | '24h' | '30d'; + +export type StatsResponse = { + range: TimeRange; + current: { + running_count: number; + vcpus_reserved: number; + memory_mb_reserved: number; + sampled_at?: string; + }; + peaks: { + running_count: number; + vcpus: number; + memory_mb: number; + }; + series: { + labels: string[]; + running: number[]; + vcpus: number[]; + memory_mb: number[]; + }; +}; + +export async function fetchStats(range: TimeRange): Promise> { + return apiFetch('GET', `/api/v1/sandboxes/stats?range=${range}`); +} + +export const POLL_INTERVALS: Record = { + '5m': 15_000, + '1h': 30_000, + '6h': 60_000, + '24h': 120_000, + '30d': 300_000, +}; + +export const RANGE_LABELS: Record = { + '5m': '5m', + '1h': '1h', + '6h': '6h', + '24h': '24h', + '30d': '30d', +}; diff --git a/frontend/src/lib/capsule-store.svelte.ts b/frontend/src/lib/capsule-store.svelte.ts new file mode 100644 index 0000000..acc0f60 --- /dev/null +++ b/frontend/src/lib/capsule-store.svelte.ts @@ -0,0 +1,3 @@ +// Shared state written by the list page and read by the capsules layout +// for the running count badge in the header. +export const capsuleRunningCount = $state({ value: 0 }); diff --git a/frontend/src/lib/components/CreateCapsuleDialog.svelte b/frontend/src/lib/components/CreateCapsuleDialog.svelte new file mode 100644 index 0000000..b570f2b --- /dev/null +++ b/frontend/src/lib/components/CreateCapsuleDialog.svelte @@ -0,0 +1,124 @@ + + +{#if open} +
+ +
{ if (!creating) onclose(); }} + onkeydown={(e) => { if (e.key === 'Escape' && !creating) onclose(); }} + >
+ +
+

Launch Capsule

+

Configure resources and launch. The VM will be ready in under a second.

+ + {#if createError} +
+ {createError} +
+ {/if} + +
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ +
+ + +
+
+ +
+ + +
+
+
+{/if} diff --git a/frontend/src/lib/components/StatsPanel.svelte b/frontend/src/lib/components/StatsPanel.svelte new file mode 100644 index 0000000..948e520 --- /dev/null +++ b/frontend/src/lib/components/StatsPanel.svelte @@ -0,0 +1,395 @@ + + +
+ + +
+ Usage Statistics +
+
+ {#each RANGES as r, i} + + {/each} +
+ {#if onlaunch} + + {/if} +
+
+ + +
+ + +
+
+ Running Now + {#if !loading} + + + Live + + {/if} +
+
+ {loading ? '—' : (stats?.current.running_count ?? 0)} +
+
capsules
+
+ + +
+ Peak Running +
+ {loading ? '—' : (stats?.peaks.running_count ?? 0)} +
+
30-day max
+
+ + +
+ Peak CPU +
+ {loading ? '—' : (stats?.peaks.vcpus ?? 0)} +
+
vCPUs reserved · 30d max
+
+ + +
+ Peak RAM +
+ {loading ? '—' : fmtGB(stats?.peaks.memory_mb ?? 0)} +
+
reserved · 30d max
+
+ +
+ + + {#if error} +
+ Failed to load stats: {error} +
+ {/if} + + +
+
+
+
Running Capsules
+
+ + {loading ? '—' : (stats?.current.running_count ?? 0)} + + now +
+
+
+ {#if !loading && stats && stats.series.labels.length === 0} +
+ Metrics will appear here once capsules have run. First data arrives within 10 seconds. +
+ {:else} +
+ +
+ {/if} +
+ + +
+
+
+
Reserved CPU & RAM
+
+ + {loading ? '—' : (stats?.current.vcpus_reserved ?? 0)} + + vCPUs + + {loading ? '—' : fmtGB(stats?.current.memory_mb_reserved ?? 0)} + + RAM +
+
+
+ {#if !loading && stats && stats.series.labels.length === 0} +
+ Metrics will appear here once capsules have run. First data arrives within 10 seconds. +
+ {:else} +
+ +
+ {/if} +
+ +
diff --git a/frontend/src/routes/dashboard/capsules/+layout.svelte b/frontend/src/routes/dashboard/capsules/+layout.svelte new file mode 100644 index 0000000..1f85886 --- /dev/null +++ b/frontend/src/routes/dashboard/capsules/+layout.svelte @@ -0,0 +1,102 @@ + + + + Wrenn — Capsules + + +
+ + +
+
+ +
+ +
+
+

+ Capsules +

+

+ Isolated VMs. Start cold in under a second — pause, snapshot, or destroy at will. +

+
+ +
+ +
+ + + + + {capsuleRunningCount.value} + running now +
+
+
+ + + +
+ + {@render children()} +
+ + +
+
+ + + + + All systems operational +
+
+
+
diff --git a/frontend/src/routes/dashboard/capsules/+page.svelte b/frontend/src/routes/dashboard/capsules/+page.svelte index a8c13a3..9c19d67 100644 --- a/frontend/src/routes/dashboard/capsules/+page.svelte +++ b/frontend/src/routes/dashboard/capsules/+page.svelte @@ -1,1025 +1,3 @@ - - - - - { if (e.key === 'Escape') openMenuId = null; }} /> - - - Wrenn — Capsules - - -
- - -
-
- -
- -
-
-

- Capsules -

-

- Isolated VMs. Start cold in under a second — pause, snapshot, or destroy at will. -

-
- -
- -
- - - - - {runningCount} - running now -
-
-
- - -
- - -
-
- - - {#if activeTab === 'stats'} -
-
- {@render metricCell('Concurrent Capsules', String(runningCount), '5-sec avg', 'limit: 20', true)} - {@render metricCell('Start Rate / Second', '0.000', '5-sec avg', null, true)} - {@render metricCell('Peak Concurrent', String(runningCount), '30-day max', 'limit: 20', false)} -
- - {@render chartCard('Concurrent Capsules', String(runningCount), 'average')} - {@render chartCard('Start Rate Per Second', '0.000', 'average')} -
- {:else} -
- -
-
- - - - -
- {filteredCapsules.length} total - -
- - - - - - - - -
- - {#if error} -
- {error} -
- {/if} - - -
- -
-
ID
-
Template
- {@render sortableHeader('CPU', 'vcpus')} - {@render sortableHeader('Memory', 'memory_mb')} - {@render sortableHeader('Idle Timeout', 'timeout_sec')} - {@render sortableHeader('Started', 'started_at')} - {@render sortableHeader('Status', 'status')} -
- - {#if loading && capsules.length === 0} -
-
- - - - Loading capsules... -
-
- {:else if filteredCapsules.length === 0} -
-
-
-
- - - - - -
-
-

- No capsules yet -

-

- Each capsule is an isolated VM. Launch one to get started. -

- -
- {:else} - {#each filteredCapsules as capsule, i (capsule.id)} - {@const stripeColor = capsule.status === 'running' ? 'bg-[var(--color-accent)]' : capsule.status === 'paused' ? 'bg-[var(--color-amber)]' : 'bg-[var(--color-text-muted)]'} -
- -
- - -
- {#if capsule.status === 'running'} - - - - - {:else if capsule.status === 'paused'} - - {:else} - - {/if} - {#if searchQuery && capsule.id.toLowerCase().includes(searchQuery.toLowerCase())} - {@const matchIdx = capsule.id.toLowerCase().indexOf(searchQuery.toLowerCase())} - {capsule.id.slice(0, matchIdx)}{capsule.id.slice(matchIdx, matchIdx + searchQuery.length)}{capsule.id.slice(matchIdx + searchQuery.length)} - {:else} - {capsule.id} - {/if} -
- - -
- {capsule.template} -
- - -
- {capsule.vcpus} -
- - -
- {capsule.memory_mb}MB -
- - -
- {capsule.timeout_sec ? `${capsule.timeout_sec}s` : '—'} -
- - -
- {formatTime(capsule.started_at)} - {#if capsule.last_active_at} - {timeAgo(capsule.last_active_at)} - {/if} -
- - -
- {#if actionLoading === capsule.id} - - - - - - {:else} - - - {/if} -
-
- {/each} - {/if} -
-
- {/if} -
- - -
-
- - - - - All systems operational -
-
-
-
- - -{#if openMenuId} - {@const openCapsule = capsules.find((c) => c.id === openMenuId)} - {#if openCapsule} -
- {#if openCapsule.status === 'running'} - - - {:else if openCapsule.status === 'paused'} - - - {/if} -
- -
- {/if} -{/if} - - -{#if snapshotTarget} -
- -
{ if (!snapshotting) snapshotTarget = null; }} - onkeydown={(e) => { if (e.key === 'Escape' && !snapshotting) snapshotTarget = null; }} - >
- -
- -
-
- - - - -
-
-

Capture snapshot

-

{snapshotTarget.capsule.id}

-
-
- -
- {#if snapshotTarget.pauseFirst} -
- - - - - -

This capsule will be paused first — memory state is captured at rest.

-
- {:else} -

The capsule's current memory state will be captured and stored as a reusable snapshot.

- {/if} - - {#if snapshotError} -
- {snapshotError} -
- {/if} - -
-
- - optional -
- { if (e.key === 'Enter' && !snapshotting) handleSnapshotConfirm(); }} - /> -

Leave blank to use an auto-generated name.

-
- -
- - -
-
-
-
-{/if} - - -{#if showCreateDialog} -
- -
{ if (!creating) showCreateDialog = false; }} - onkeydown={(e) => { if (e.key === 'Escape' && !creating) showCreateDialog = false; }} - >
- -
-

Launch Capsule

-

Configure resources and launch. The VM will be ready in under a second.

- - {#if createError} -
- {createError} -
- {/if} - -
-
- - -
- -
-
- - -
-
- - -
-
- -
- - -
-
- -
- - -
-
-
-{/if} - - -{#if destroyTarget} -
- -
{ if (!destroying) destroyTarget = null; }} - onkeydown={(e) => { if (e.key === 'Escape' && !destroying) destroyTarget = null; }} - >
- -
-

Destroy Capsule

-

- Terminate {destroyTarget.id} and destroy all data inside it. This cannot be undone. -

- - {#if destroyError} -
- {destroyError} -
- {/if} - -
- - -
-
-
-{/if} - - -{#snippet sortableHeader(label: string, key: SortKey)} - -{/snippet} - -{#snippet metricCell(label: string, value: string, sublabel: string, extra: string | null, hasBorderRight: boolean)} -
-
- {label} - - - Live - -
-
{value}
-
- {sublabel} - {#if extra} - | - {extra} - {/if} -
-
-{/snippet} - -{#snippet chartCard(label: string, value: string, sublabel: string)} -
-
-
-
{label}
-
- {value} - {sublabel} - - - Live - -
-
- -
- {#each ['5m', '1H', '6H', '24H', '30D'] as range, i} - - {/each} -
-
- -
-
- 4 - 3 - 2 - 1 - 0 -
- - - {#each [0, 45, 90, 135, 180] as y} - - {/each} - - - -
- {#each ['03:01', '03:02', '03:03', '03:04', '03:05'] as t} - {t} - {/each} -
-
-
-{/snippet} diff --git a/frontend/src/routes/dashboard/capsules/+page.ts b/frontend/src/routes/dashboard/capsules/+page.ts new file mode 100644 index 0000000..029fe7c --- /dev/null +++ b/frontend/src/routes/dashboard/capsules/+page.ts @@ -0,0 +1,5 @@ +import { redirect } from '@sveltejs/kit'; + +export function load() { + throw redirect(307, '/dashboard/capsules/list'); +} diff --git a/frontend/src/routes/dashboard/capsules/list/+page.svelte b/frontend/src/routes/dashboard/capsules/list/+page.svelte new file mode 100644 index 0000000..a04a9e7 --- /dev/null +++ b/frontend/src/routes/dashboard/capsules/list/+page.svelte @@ -0,0 +1,734 @@ + + + + + + { if (e.key === 'Escape') openMenuId = null; }} /> + +
+ +
+
+ + + + +
+ {filteredCapsules.length} total + +
+ + + + + + + + +
+ + {#if error} +
+ {error} +
+ {/if} + + +
+ +
+
ID
+
Template
+ {@render sortableHeader('CPU', 'vcpus')} + {@render sortableHeader('Memory', 'memory_mb')} + {@render sortableHeader('Idle Timeout', 'timeout_sec')} + {@render sortableHeader('Started', 'started_at')} + {@render sortableHeader('Status', 'status')} +
+ + {#if loading && capsules.length === 0} +
+
+ + + + Loading capsules... +
+
+ {:else if filteredCapsules.length === 0} +
+
+
+
+ + + + + +
+
+

+ No capsules yet +

+

+ Each capsule is an isolated VM. Launch one to get started. +

+ +
+ {:else} + {#each filteredCapsules as capsule, i (capsule.id)} + {@const stripeColor = capsule.status === 'running' ? 'bg-[var(--color-accent)]' : capsule.status === 'paused' ? 'bg-[var(--color-amber)]' : 'bg-[var(--color-text-muted)]'} +
+ +
+ + +
+ {#if capsule.status === 'running'} + + + + + {:else if capsule.status === 'paused'} + + {:else} + + {/if} + {#if searchQuery && capsule.id.toLowerCase().includes(searchQuery.toLowerCase())} + {@const matchIdx = capsule.id.toLowerCase().indexOf(searchQuery.toLowerCase())} + {capsule.id.slice(0, matchIdx)}{capsule.id.slice(matchIdx, matchIdx + searchQuery.length)}{capsule.id.slice(matchIdx + searchQuery.length)} + {:else} + {capsule.id} + {/if} +
+ + +
+ {capsule.template} +
+ + +
+ {capsule.vcpus} +
+ + +
+ {capsule.memory_mb}MB +
+ + +
+ {capsule.timeout_sec ? `${capsule.timeout_sec}s` : '—'} +
+ + +
+ {formatTime(capsule.started_at)} + {#if capsule.last_active_at} + {timeAgo(capsule.last_active_at)} + {/if} +
+ + +
+ {#if actionLoading === capsule.id} + + + + + + {:else} + + {/if} +
+
+ {/each} + {/if} +
+
+ + +{#if openMenuId} + {@const openCapsule = capsules.find((c) => c.id === openMenuId)} + {#if openCapsule} +
+ {#if openCapsule.status === 'running'} + + + {:else if openCapsule.status === 'paused'} + + + {/if} +
+ +
+ {/if} +{/if} + + +{#if snapshotTarget} +
+ +
{ if (!snapshotting) snapshotTarget = null; }} + onkeydown={(e) => { if (e.key === 'Escape' && !snapshotting) snapshotTarget = null; }} + >
+ +
+
+
+ + + + +
+
+

Capture snapshot

+

{snapshotTarget.capsule.id}

+
+
+ +
+ {#if snapshotTarget.pauseFirst} +
+ + + + + +

This capsule will be paused first — memory state is captured at rest.

+
+ {:else} +

The capsule's current memory state will be captured and stored as a reusable snapshot.

+ {/if} + + {#if snapshotError} +
+ {snapshotError} +
+ {/if} + +
+
+ + optional +
+ { if (e.key === 'Enter' && !snapshotting) handleSnapshotConfirm(); }} + /> +

Leave blank to use an auto-generated name.

+
+ +
+ + +
+
+
+
+{/if} + + +{#if destroyTarget} +
+ +
{ if (!destroying) destroyTarget = null; }} + onkeydown={(e) => { if (e.key === 'Escape' && !destroying) destroyTarget = null; }} + >
+
+

Destroy Capsule

+

+ Terminate {destroyTarget.id} and destroy all data inside it. This cannot be undone. +

+ + {#if destroyError} +
+ {destroyError} +
+ {/if} + +
+ + +
+
+
+{/if} + + + { showCreateDialog = false; }} + oncreated={handleCapsuleCreated} +/> + +{#snippet sortableHeader(label: string, key: SortKey)} + +{/snippet} diff --git a/frontend/src/routes/dashboard/capsules/stats/+page.svelte b/frontend/src/routes/dashboard/capsules/stats/+page.svelte new file mode 100644 index 0000000..4b2e637 --- /dev/null +++ b/frontend/src/routes/dashboard/capsules/stats/+page.svelte @@ -0,0 +1,17 @@ + + + { showCreateDialog = true; }} + launchDisabled={!auth.teamId} +/> + + { showCreateDialog = false; }} +/> diff --git a/internal/api/handlers_stats.go b/internal/api/handlers_stats.go new file mode 100644 index 0000000..06fe978 --- /dev/null +++ b/internal/api/handlers_stats.go @@ -0,0 +1,100 @@ +package api + +import ( + "log/slog" + "net/http" + "time" + + "git.omukk.dev/wrenn/sandbox/internal/auth" + "git.omukk.dev/wrenn/sandbox/internal/service" +) + +type statsHandler struct { + svc *service.StatsService +} + +func newStatsHandler(svc *service.StatsService) *statsHandler { + return &statsHandler{svc: svc} +} + +type statsCurrentResponse struct { + RunningCount int32 `json:"running_count"` + VCPUsReserved int32 `json:"vcpus_reserved"` + MemoryMBReserved int32 `json:"memory_mb_reserved"` + SampledAt string `json:"sampled_at,omitempty"` +} + +type statsPeaksResponse struct { + RunningCount int32 `json:"running_count"` + VCPUs int32 `json:"vcpus"` + MemoryMB int32 `json:"memory_mb"` +} + +type statsSeriesResponse struct { + Labels []string `json:"labels"` + Running []int32 `json:"running"` + VCPUs []int32 `json:"vcpus"` + MemoryMB []int32 `json:"memory_mb"` +} + +type statsResponse struct { + Range string `json:"range"` + Current statsCurrentResponse `json:"current"` + Peaks statsPeaksResponse `json:"peaks"` + Series statsSeriesResponse `json:"series"` +} + +// GetStats handles GET /v1/sandboxes/stats?range=5m|1h|6h|24h|30d +func (h *statsHandler) GetStats(w http.ResponseWriter, r *http.Request) { + ac := auth.MustFromContext(r.Context()) + + rangeParam := r.URL.Query().Get("range") + if rangeParam == "" { + rangeParam = string(service.Range1h) + } + tr := service.TimeRange(rangeParam) + if !service.ValidRange(tr) { + writeError(w, http.StatusBadRequest, "invalid_request", "range must be one of: 5m, 1h, 6h, 24h, 30d") + return + } + + current, peaks, series, err := h.svc.GetStats(r.Context(), ac.TeamID, tr) + if err != nil { + slog.Error("stats handler: get stats failed", "team_id", ac.TeamID, "error", err) + writeError(w, http.StatusInternalServerError, "internal_error", "failed to retrieve stats") + return + } + + resp := statsResponse{ + Range: rangeParam, + Current: statsCurrentResponse{ + RunningCount: current.RunningCount, + VCPUsReserved: current.VCPUsReserved, + MemoryMBReserved: current.MemoryMBReserved, + }, + Peaks: statsPeaksResponse{ + RunningCount: peaks.RunningCount, + VCPUs: peaks.VCPUs, + MemoryMB: peaks.MemoryMB, + }, + Series: statsSeriesResponse{ + Labels: make([]string, len(series)), + Running: make([]int32, len(series)), + VCPUs: make([]int32, len(series)), + MemoryMB: make([]int32, len(series)), + }, + } + + if !current.SampledAt.IsZero() { + resp.Current.SampledAt = current.SampledAt.UTC().Format(time.RFC3339) + } + + for i, pt := range series { + resp.Series.Labels[i] = pt.Bucket.UTC().Format(time.RFC3339) + resp.Series.Running[i] = pt.RunningCount + resp.Series.VCPUs[i] = pt.VCPUsReserved + resp.Series.MemoryMB[i] = pt.MemoryMBReserved + } + + writeJSON(w, http.StatusOK, resp) +} diff --git a/internal/api/metrics_sampler.go b/internal/api/metrics_sampler.go new file mode 100644 index 0000000..7ea3cd0 --- /dev/null +++ b/internal/api/metrics_sampler.go @@ -0,0 +1,68 @@ +package api + +import ( + "context" + "log/slog" + "time" + + "git.omukk.dev/wrenn/sandbox/internal/db" +) + +// MetricsSampler records per-team sandbox resource usage to +// sandbox_metrics_snapshots every interval. It also prunes rows older than +// 60 days on each tick to keep the table bounded. +type MetricsSampler struct { + db *db.Queries + interval time.Duration +} + +// NewMetricsSampler creates a MetricsSampler. +func NewMetricsSampler(queries *db.Queries, interval time.Duration) *MetricsSampler { + return &MetricsSampler{db: queries, interval: interval} +} + +// Start runs the sampler loop until the context is cancelled. +func (s *MetricsSampler) Start(ctx context.Context) { + go func() { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + // Sample immediately on startup. + s.run(ctx) + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + s.run(ctx) + } + } + }() +} + +func (s *MetricsSampler) run(ctx context.Context) { + s.prune(ctx) + if err := s.sample(ctx); err != nil { + slog.Warn("metrics sampler: sample failed", "error", err) + } +} + +func (s *MetricsSampler) sample(ctx context.Context) error { + rows, err := s.db.SampleSandboxMetrics(ctx) + if err != nil { + return err + } + for _, row := range rows { + if err := s.db.InsertMetricsSnapshot(ctx, db.InsertMetricsSnapshotParams(row)); err != nil { + slog.Warn("metrics sampler: insert snapshot failed", "team_id", row.TeamID, "error", err) + } + } + return nil +} + +func (s *MetricsSampler) prune(ctx context.Context) { + if err := s.db.PruneOldMetrics(ctx); err != nil { + slog.Warn("metrics sampler: prune failed", "error", err) + } +} diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index e46fabc..86e88c6 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -613,6 +613,32 @@ paths: items: $ref: "#/components/schemas/Sandbox" + /v1/sandboxes/stats: + get: + summary: Get sandbox usage stats for your team + operationId: getSandboxStats + tags: [sandboxes] + security: + - apiKeyAuth: [] + parameters: + - name: range + in: query + required: false + schema: + type: string + enum: [5m, 1h, 6h, 24h, 30d] + default: 1h + description: Time window for the time-series data. + responses: + "200": + description: Sandbox stats for the team + content: + application/json: + schema: + $ref: "#/components/schemas/SandboxStats" + "400": + $ref: "#/components/responses/BadRequest" + /v1/sandboxes/{id}: parameters: - name: id @@ -1578,6 +1604,57 @@ components: after this duration of inactivity (no exec or ping). 0 means no auto-pause. + SandboxStats: + type: object + properties: + range: + type: string + enum: [5m, 1h, 6h, 24h, 30d] + current: + type: object + properties: + running_count: + type: integer + vcpus_reserved: + type: integer + memory_mb_reserved: + type: integer + sampled_at: + type: string + format: date-time + nullable: true + peaks: + type: object + description: Maximum values over the last 30 days. + properties: + running_count: + type: integer + vcpus: + type: integer + memory_mb: + type: integer + series: + type: object + description: Parallel arrays for chart rendering. + properties: + labels: + type: array + items: + type: string + format: date-time + running: + type: array + items: + type: integer + vcpus: + type: array + items: + type: integer + memory_mb: + type: array + items: + type: integer + Sandbox: type: object properties: diff --git a/internal/api/server.go b/internal/api/server.go index 366a122..636d1d1 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -46,6 +46,7 @@ func New( hostSvc := &service.HostService{DB: queries, Redis: rdb, JWT: jwtSecret, Pool: pool} teamSvc := &service.TeamService{DB: queries, Pool: pgPool, HostPool: pool} auditSvc := &service.AuditService{DB: queries} + statsSvc := &service.StatsService{DB: queries, Pool: pgPool} al := audit.New(queries) @@ -62,6 +63,7 @@ func New( teamH := newTeamHandler(teamSvc, al) usersH := newUsersHandler(teamSvc) auditH := newAuditHandler(auditSvc) + statsH := newStatsHandler(statsSvc) // OpenAPI spec and docs. r.Get("/openapi.yaml", serveOpenAPI) @@ -109,6 +111,7 @@ func New( r.Use(requireAPIKeyOrJWT(queries, jwtSecret)) r.Post("/", sandbox.Create) r.Get("/", sandbox.List) + r.Get("/stats", statsH.GetStats) r.Route("/{id}", func(r chi.Router) { r.Get("/", sandbox.Get) diff --git a/internal/db/metrics.sql.go b/internal/db/metrics.sql.go new file mode 100644 index 0000000..1bcc226 --- /dev/null +++ b/internal/db/metrics.sql.go @@ -0,0 +1,141 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.30.0 +// source: metrics.sql + +package db + +import ( + "context" + + "github.com/jackc/pgx/v5/pgtype" +) + +const getCurrentMetrics = `-- name: GetCurrentMetrics :one +SELECT running_count, vcpus_reserved, memory_mb_reserved, sampled_at +FROM sandbox_metrics_snapshots +WHERE team_id = $1 +ORDER BY sampled_at DESC +LIMIT 1 +` + +type GetCurrentMetricsRow struct { + RunningCount int32 `json:"running_count"` + VcpusReserved int32 `json:"vcpus_reserved"` + MemoryMbReserved int32 `json:"memory_mb_reserved"` + SampledAt pgtype.Timestamptz `json:"sampled_at"` +} + +func (q *Queries) GetCurrentMetrics(ctx context.Context, teamID string) (GetCurrentMetricsRow, error) { + row := q.db.QueryRow(ctx, getCurrentMetrics, teamID) + var i GetCurrentMetricsRow + err := row.Scan( + &i.RunningCount, + &i.VcpusReserved, + &i.MemoryMbReserved, + &i.SampledAt, + ) + return i, err +} + +const getPeakMetrics = `-- name: GetPeakMetrics :one +SELECT + COALESCE(MAX(running_count), 0)::INTEGER AS peak_running_count, + COALESCE(MAX(vcpus_reserved), 0)::INTEGER AS peak_vcpus, + COALESCE(MAX(memory_mb_reserved), 0)::INTEGER AS peak_memory_mb +FROM sandbox_metrics_snapshots +WHERE team_id = $1 + AND sampled_at > NOW() - INTERVAL '30 days' +` + +type GetPeakMetricsRow struct { + PeakRunningCount int32 `json:"peak_running_count"` + PeakVcpus int32 `json:"peak_vcpus"` + PeakMemoryMb int32 `json:"peak_memory_mb"` +} + +func (q *Queries) GetPeakMetrics(ctx context.Context, teamID string) (GetPeakMetricsRow, error) { + row := q.db.QueryRow(ctx, getPeakMetrics, teamID) + var i GetPeakMetricsRow + err := row.Scan(&i.PeakRunningCount, &i.PeakVcpus, &i.PeakMemoryMb) + return i, err +} + +const insertMetricsSnapshot = `-- name: InsertMetricsSnapshot :exec +INSERT INTO sandbox_metrics_snapshots (team_id, running_count, vcpus_reserved, memory_mb_reserved) +VALUES ($1, $2, $3, $4) +` + +type InsertMetricsSnapshotParams struct { + TeamID string `json:"team_id"` + RunningCount int32 `json:"running_count"` + VcpusReserved int32 `json:"vcpus_reserved"` + MemoryMbReserved int32 `json:"memory_mb_reserved"` +} + +func (q *Queries) InsertMetricsSnapshot(ctx context.Context, arg InsertMetricsSnapshotParams) error { + _, err := q.db.Exec(ctx, insertMetricsSnapshot, + arg.TeamID, + arg.RunningCount, + arg.VcpusReserved, + arg.MemoryMbReserved, + ) + return err +} + +const pruneOldMetrics = `-- name: PruneOldMetrics :exec +DELETE FROM sandbox_metrics_snapshots +WHERE sampled_at < NOW() - INTERVAL '60 days' +` + +func (q *Queries) PruneOldMetrics(ctx context.Context) error { + _, err := q.db.Exec(ctx, pruneOldMetrics) + return err +} + +const sampleSandboxMetrics = `-- name: SampleSandboxMetrics :many +SELECT + team_id, + (COUNT(*) FILTER (WHERE status IN ('running', 'starting')))::INTEGER AS running_count, + (COALESCE(SUM(vcpus) FILTER (WHERE status IN ('running', 'starting')), 0) + + CEIL(COALESCE(SUM(vcpus) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS vcpus_reserved, + (COALESCE(SUM(memory_mb) FILTER (WHERE status IN ('running', 'starting')), 0) + + CEIL(COALESCE(SUM(memory_mb) FILTER (WHERE status = 'paused'), 0)::NUMERIC / 2))::INTEGER AS memory_mb_reserved +FROM sandboxes +WHERE status IN ('running', 'starting', 'paused') +GROUP BY team_id +` + +type SampleSandboxMetricsRow struct { + TeamID string `json:"team_id"` + RunningCount int32 `json:"running_count"` + VcpusReserved int32 `json:"vcpus_reserved"` + MemoryMbReserved int32 `json:"memory_mb_reserved"` +} + +// Aggregates per-team resource usage from the live sandboxes table. +// paused sandboxes count at 50% (ceil) for capacity reservation. +func (q *Queries) SampleSandboxMetrics(ctx context.Context) ([]SampleSandboxMetricsRow, error) { + rows, err := q.db.Query(ctx, sampleSandboxMetrics) + if err != nil { + return nil, err + } + defer rows.Close() + var items []SampleSandboxMetricsRow + for rows.Next() { + var i SampleSandboxMetricsRow + if err := rows.Scan( + &i.TeamID, + &i.RunningCount, + &i.VcpusReserved, + &i.MemoryMbReserved, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} diff --git a/internal/db/models.go b/internal/db/models.go index 00cbf70..df2981e 100644 --- a/internal/db/models.go +++ b/internal/db/models.go @@ -99,6 +99,15 @@ type Sandbox struct { TeamID string `json:"team_id"` } +type SandboxMetricsSnapshot struct { + ID int64 `json:"id"` + TeamID string `json:"team_id"` + SampledAt pgtype.Timestamptz `json:"sampled_at"` + RunningCount int32 `json:"running_count"` + VcpusReserved int32 `json:"vcpus_reserved"` + MemoryMbReserved int32 `json:"memory_mb_reserved"` +} + type Team struct { ID string `json:"id"` Name string `json:"name"` diff --git a/internal/service/stats.go b/internal/service/stats.go new file mode 100644 index 0000000..38ac79d --- /dev/null +++ b/internal/service/stats.go @@ -0,0 +1,157 @@ +package service + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" + + "git.omukk.dev/wrenn/sandbox/internal/db" +) + +// TimeRange identifies a chart time window. +type TimeRange string + +const ( + Range5m TimeRange = "5m" + Range1h TimeRange = "1h" + Range6h TimeRange = "6h" + Range24h TimeRange = "24h" + Range30d TimeRange = "30d" +) + +type rangeConfig struct { + bucketSec int // bucket width in seconds for time-series aggregation + intervalLiteral string // PostgreSQL interval literal for the lookback window +} + +var rangeConfigs = map[TimeRange]rangeConfig{ + Range5m: {bucketSec: 3, intervalLiteral: "5 minutes"}, + Range1h: {bucketSec: 30, intervalLiteral: "1 hour"}, + Range6h: {bucketSec: 180, intervalLiteral: "6 hours"}, + Range24h: {bucketSec: 720, intervalLiteral: "24 hours"}, + Range30d: {bucketSec: 21600, intervalLiteral: "30 days"}, +} + +// ValidRange returns true if r is a known TimeRange value. +func ValidRange(r TimeRange) bool { + _, ok := rangeConfigs[r] + return ok +} + +// StatPoint is one bucketed data point in the time-series. +type StatPoint struct { + Bucket time.Time + RunningCount int32 + VCPUsReserved int32 + MemoryMBReserved int32 +} + +// CurrentStats holds the most recent sampled values for a team. +type CurrentStats struct { + RunningCount int32 + VCPUsReserved int32 + MemoryMBReserved int32 + SampledAt time.Time +} + +// PeakStats holds the 30-day maximum values for a team. +type PeakStats struct { + RunningCount int32 + VCPUs int32 + MemoryMB int32 +} + +// StatsService computes sandbox metrics for the dashboard. +type StatsService struct { + DB *db.Queries + Pool *pgxpool.Pool +} + +// GetStats returns current stats, 30-day peaks, and a time-series for the +// given team and time range. If no snapshots exist yet, zeros are returned. +func (s *StatsService) GetStats(ctx context.Context, teamID string, r TimeRange) (CurrentStats, PeakStats, []StatPoint, error) { + cfg, ok := rangeConfigs[r] + if !ok { + return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("unknown range: %s", r) + } + + // Current snapshot. + var current CurrentStats + cur, err := s.DB.GetCurrentMetrics(ctx, teamID) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get current metrics: %w", err) + } + if err == nil { + current = CurrentStats{ + RunningCount: cur.RunningCount, + VCPUsReserved: cur.VcpusReserved, + MemoryMBReserved: cur.MemoryMbReserved, + SampledAt: cur.SampledAt.Time, + } + } + + // 30-day peaks. + var peaks PeakStats + pk, err := s.DB.GetPeakMetrics(ctx, teamID) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get peak metrics: %w", err) + } + if err == nil { + peaks = PeakStats{ + RunningCount: pk.PeakRunningCount, + VCPUs: pk.PeakVcpus, + MemoryMB: pk.PeakMemoryMb, + } + } + + // Time-series — dynamic bucket width, executed via pgx directly. + series, err := s.queryTimeSeries(ctx, teamID, cfg) + if err != nil { + return CurrentStats{}, PeakStats{}, nil, fmt.Errorf("get time series: %w", err) + } + + return current, peaks, series, nil +} + +// timeSeriesSQL uses an epoch-floor trick to bucket rows by an arbitrary +// integer number of seconds without requiring TimescaleDB. +// +// $1 = bucket width in seconds (integer) +// $2 = team_id +// $3 = lookback interval literal (e.g. '1 hour') +const timeSeriesSQL = ` +SELECT + to_timestamp(floor(extract(epoch FROM sampled_at) / $1) * $1) AS bucket, + AVG(running_count)::INTEGER AS running_count, + AVG(vcpus_reserved)::INTEGER AS vcpus_reserved, + AVG(memory_mb_reserved)::INTEGER AS memory_mb_reserved +FROM sandbox_metrics_snapshots +WHERE team_id = $2 + AND sampled_at >= NOW() - $3::INTERVAL +GROUP BY bucket +ORDER BY bucket ASC +` + +func (s *StatsService) queryTimeSeries(ctx context.Context, teamID string, cfg rangeConfig) ([]StatPoint, error) { + rows, err := s.Pool.Query(ctx, timeSeriesSQL, cfg.bucketSec, teamID, cfg.intervalLiteral) + if err != nil { + return nil, err + } + defer rows.Close() + + var points []StatPoint + for rows.Next() { + var p StatPoint + var bucket time.Time + if err := rows.Scan(&bucket, &p.RunningCount, &p.VCPUsReserved, &p.MemoryMBReserved); err != nil { + return nil, err + } + p.Bucket = bucket + points = append(points, p) + } + return points, rows.Err() +}