v0.0.1 (#8)

Co-authored-by: Tasnim Kabir Sadik <tksadik92@gmail.com> Reviewed-on: wrenn/sandbox#8
2026-04-09 19:24:49 +00:00
parent 32e5a5a715
commit d3e4812e46
199 changed files with 24552 additions and 2776 deletions
--- a/internal/sandbox/conntracker.go
+++ b/internal/sandbox/conntracker.go
@ -0,0 +1,85 @@
+package sandbox
+
+import (
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+// ConnTracker tracks active proxy connections for a single sandbox and
+// provides a drain mechanism for pre-pause graceful shutdown.
+// It is safe for concurrent use.
+type ConnTracker struct {
+	draining atomic.Bool
+	wg       sync.WaitGroup
+
+	// cancelMu protects cancelDrain so Reset can signal a timed-out Drain
+	// goroutine to exit, preventing goroutine leaks on repeated pause failures.
+	cancelMu    sync.Mutex
+	cancelDrain chan struct{}
+}
+
+// Acquire registers one in-flight connection. Returns false if the tracker
+// is already draining; the caller must not call Release in that case.
+func (t *ConnTracker) Acquire() bool {
+	if t.draining.Load() {
+		return false
+	}
+	t.wg.Add(1)
+	// Re-check after Add: Drain may have set draining between our Load
+	// and Add. If so, undo the Add and reject the connection.
+	if t.draining.Load() {
+		t.wg.Done()
+		return false
+	}
+	return true
+}
+
+// Release marks one connection as complete. Must be called exactly once
+// per successful Acquire.
+func (t *ConnTracker) Release() {
+	t.wg.Done()
+}
+
+// Drain marks the tracker as draining (all future Acquire calls return
+// false) and waits up to timeout for in-flight connections to finish.
+func (t *ConnTracker) Drain(timeout time.Duration) {
+	t.draining.Store(true)
+
+	cancel := make(chan struct{})
+	t.cancelMu.Lock()
+	t.cancelDrain = cancel
+	t.cancelMu.Unlock()
+
+	done := make(chan struct{})
+	go func() {
+		t.wg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+	case <-cancel:
+		// Reset was called; stop waiting.
+	case <-time.After(timeout):
+	}
+}
+
+// Reset re-enables the tracker after a failed drain. This allows the
+// sandbox to accept proxy connections again if the pause operation fails
+// and the VM is resumed. It also cancels any lingering Drain goroutine.
+func (t *ConnTracker) Reset() {
+	t.cancelMu.Lock()
+	if t.cancelDrain != nil {
+		select {
+		case <-t.cancelDrain:
+			// Already closed.
+		default:
+			close(t.cancelDrain)
+		}
+		t.cancelDrain = nil
+	}
+	t.cancelMu.Unlock()
+
+	t.draining.Store(false)
+}
--- a/internal/sandbox/images.go
+++ b/internal/sandbox/images.go
@ -0,0 +1,106 @@
+package sandbox
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"git.omukk.dev/wrenn/sandbox/internal/id"
+	"git.omukk.dev/wrenn/sandbox/internal/layout"
+)
+
+// DefaultDiskSizeMB is the standard disk size for base images. Images smaller
+// than this are expanded at startup so that dm-snapshot sandboxes see the full
+// size without per-sandbox copies. The expansion is sparse — only metadata
+// changes; no physical disk is consumed beyond the original content.
+const DefaultDiskSizeMB = 5120 // 5 GB
+
+// EnsureImageSizes walks template directories and expands any rootfs.ext4 that
+// is smaller than the target size. This is idempotent: images already at or
+// above the target size are left untouched. Should be called once at host agent
+// startup before any sandboxes are created.
+func EnsureImageSizes(wrennDir string, targetMB int) error {
+	if targetMB <= 0 {
+		targetMB = DefaultDiskSizeMB
+	}
+	targetBytes := int64(targetMB) * 1024 * 1024
+
+	// Expand the built-in minimal image.
+	minimalRootfs := layout.TemplateRootfs(wrennDir, id.PlatformTeamID, id.MinimalTemplateID)
+	if err := expandImage(minimalRootfs, targetBytes, targetMB); err != nil {
+		return err
+	}
+
+	// Walk teams/{teamDir}/{templateDir}/rootfs.ext4 two levels deep.
+	teamsDir := layout.TeamsDir(wrennDir)
+	teamEntries, err := os.ReadDir(teamsDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil // teams dir doesn't exist yet — nothing to expand
+		}
+		return fmt.Errorf("read teams dir: %w", err)
+	}
+
+	for _, teamEntry := range teamEntries {
+		if !teamEntry.IsDir() {
+			continue
+		}
+		teamPath := filepath.Join(teamsDir, teamEntry.Name())
+		templateEntries, err := os.ReadDir(teamPath)
+		if err != nil {
+			continue
+		}
+		for _, tmplEntry := range templateEntries {
+			if !tmplEntry.IsDir() {
+				continue
+			}
+			rootfs := filepath.Join(teamPath, tmplEntry.Name(), "rootfs.ext4")
+			if err := expandImage(rootfs, targetBytes, targetMB); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// expandImage expands a single rootfs image if it is smaller than targetBytes.
+func expandImage(rootfs string, targetBytes int64, targetMB int) error {
+	info, err := os.Stat(rootfs)
+	if err != nil {
+		return nil // not every template dir has a rootfs.ext4
+	}
+
+	if info.Size() >= targetBytes {
+		return nil // already large enough
+	}
+
+	slog.Info("expanding base image",
+		"path", rootfs,
+		"from_mb", info.Size()/(1024*1024),
+		"to_mb", targetMB,
+	)
+
+	// Expand the file (sparse — instant, no physical disk used).
+	if err := os.Truncate(rootfs, targetBytes); err != nil {
+		return fmt.Errorf("truncate %s: %w", rootfs, err)
+	}
+
+	// Check filesystem before resize.
+	if out, err := exec.Command("e2fsck", "-fy", rootfs).CombinedOutput(); err != nil {
+		// e2fsck returns 1 if it fixed errors, which is fine.
+		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() > 1 {
+			return fmt.Errorf("e2fsck %s: %s: %w", rootfs, string(out), err)
+		}
+	}
+
+	// Grow the ext4 filesystem to fill the new file size.
+	if out, err := exec.Command("resize2fs", rootfs).CombinedOutput(); err != nil {
+		return fmt.Errorf("resize2fs %s: %s: %w", rootfs, string(out), err)
+	}
+
+	slog.Info("base image expanded", "path", rootfs, "size_mb", targetMB)
+	return nil
+}
--- a/internal/sandbox/manager.go
+++ b/internal/sandbox/manager.go
--- a/internal/sandbox/metrics.go
+++ b/internal/sandbox/metrics.go
@ -0,0 +1,178 @@
+package sandbox
+
+import (
+	"sync"
+	"time"
+)
+
+// MetricPoint holds one metrics sample.
+type MetricPoint struct {
+	Timestamp time.Time
+	CPUPct    float64
+	MemBytes  int64
+	DiskBytes int64
+}
+
+// Ring buffer capacity constants.
+const (
+	ring10mCap = 1200 // 500ms × 1200 = 10 min
+	ring2hCap  = 240  // 30s × 240 = 2 h
+	ring24hCap = 288  // 5min × 288 = 24 h
+
+	downsample2hEvery  = 60 // 60 × 500ms = 30s
+	downsample24hEvery = 10 // 10 × 30s = 5min
+)
+
+// metricsRing holds three tiered ring buffers with automatic downsampling
+// from the finest tier into coarser tiers.
+type metricsRing struct {
+	mu sync.Mutex
+
+	// 10-minute tier: 500ms samples.
+	buf10m   [ring10mCap]MetricPoint
+	idx10m   int
+	count10m int
+
+	// 2-hour tier: 30s averages.
+	buf2h   [ring2hCap]MetricPoint
+	idx2h   int
+	count2h int
+
+	// 24-hour tier: 5min averages.
+	buf24h   [ring24hCap]MetricPoint
+	idx24h   int
+	count24h int
+
+	// Accumulators for downsampling.
+	acc500ms  [downsample2hEvery]MetricPoint
+	acc500msN int
+
+	acc30s  [downsample24hEvery]MetricPoint
+	acc30sN int
+}
+
+// newMetricsRing creates an empty metrics ring buffer.
+func newMetricsRing() *metricsRing {
+	return &metricsRing{}
+}
+
+// Push adds a 500ms sample to the finest tier and triggers downsampling
+// into coarser tiers when enough samples have accumulated.
+func (r *metricsRing) Push(p MetricPoint) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	// Write to 10m ring.
+	r.buf10m[r.idx10m] = p
+	r.idx10m = (r.idx10m + 1) % ring10mCap
+	if r.count10m < ring10mCap {
+		r.count10m++
+	}
+
+	// Accumulate for 2h downsample.
+	r.acc500ms[r.acc500msN] = p
+	r.acc500msN++
+	if r.acc500msN == downsample2hEvery {
+		avg := averagePoints(r.acc500ms[:downsample2hEvery])
+		r.push2h(avg)
+		r.acc500msN = 0
+	}
+}
+
+func (r *metricsRing) push2h(p MetricPoint) {
+	r.buf2h[r.idx2h] = p
+	r.idx2h = (r.idx2h + 1) % ring2hCap
+	if r.count2h < ring2hCap {
+		r.count2h++
+	}
+
+	// Accumulate for 24h downsample.
+	r.acc30s[r.acc30sN] = p
+	r.acc30sN++
+	if r.acc30sN == downsample24hEvery {
+		avg := averagePoints(r.acc30s[:downsample24hEvery])
+		r.push24h(avg)
+		r.acc30sN = 0
+	}
+}
+
+func (r *metricsRing) push24h(p MetricPoint) {
+	r.buf24h[r.idx24h] = p
+	r.idx24h = (r.idx24h + 1) % ring24hCap
+	if r.count24h < ring24hCap {
+		r.count24h++
+	}
+}
+
+// Get10m returns the 10-minute tier points in chronological order.
+func (r *metricsRing) Get10m() []MetricPoint {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return r.readRing(r.buf10m[:], r.idx10m, r.count10m)
+}
+
+// Get2h returns the 2-hour tier points in chronological order.
+func (r *metricsRing) Get2h() []MetricPoint {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return r.readRing(r.buf2h[:], r.idx2h, r.count2h)
+}
+
+// Get24h returns the 24-hour tier points in chronological order.
+func (r *metricsRing) Get24h() []MetricPoint {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return r.readRing(r.buf24h[:], r.idx24h, r.count24h)
+}
+
+// Flush returns all three tiers and resets the ring buffer.
+func (r *metricsRing) Flush() (pts10m, pts2h, pts24h []MetricPoint) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	pts10m = r.readRing(r.buf10m[:], r.idx10m, r.count10m)
+	pts2h = r.readRing(r.buf2h[:], r.idx2h, r.count2h)
+	pts24h = r.readRing(r.buf24h[:], r.idx24h, r.count24h)
+
+	// Reset all state.
+	r.idx10m, r.count10m = 0, 0
+	r.idx2h, r.count2h = 0, 0
+	r.idx24h, r.count24h = 0, 0
+	r.acc500msN = 0
+	r.acc30sN = 0
+
+	return pts10m, pts2h, pts24h
+}
+
+// readRing extracts elements from a circular buffer in chronological order.
+func (r *metricsRing) readRing(buf []MetricPoint, nextIdx, count int) []MetricPoint {
+	if count == 0 {
+		return nil
+	}
+	result := make([]MetricPoint, count)
+	bufLen := len(buf)
+	start := (nextIdx - count + bufLen) % bufLen
+	for i := range count {
+		result[i] = buf[(start+i)%bufLen]
+	}
+	return result
+}
+
+// averagePoints computes the average of a slice of MetricPoints.
+// The timestamp is set to the last point's timestamp.
+func averagePoints(pts []MetricPoint) MetricPoint {
+	n := float64(len(pts))
+	var cpu float64
+	var mem, disk int64
+	for _, p := range pts {
+		cpu += p.CPUPct
+		mem += p.MemBytes
+		disk += p.DiskBytes
+	}
+	return MetricPoint{
+		Timestamp: pts[len(pts)-1].Timestamp,
+		CPUPct:    cpu / n,
+		MemBytes:  int64(float64(mem) / n),
+		DiskBytes: int64(float64(disk) / n),
+	}
+}
--- a/internal/sandbox/proc.go
+++ b/internal/sandbox/proc.go
@ -0,0 +1,83 @@
+package sandbox
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+// cpuStat holds raw CPU jiffies read from /proc/{pid}/stat.
+type cpuStat struct {
+	utime uint64
+	stime uint64
+}
+
+// readCPUStat reads user and system CPU jiffies from /proc/{pid}/stat.
+// Fields 14 (utime) and 15 (stime) are 1-indexed in the man page;
+// after splitting on space, they are at indices 13 and 14.
+func readCPUStat(pid int) (cpuStat, error) {
+	path := fmt.Sprintf("/proc/%d/stat", pid)
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return cpuStat{}, fmt.Errorf("read stat: %w", err)
+	}
+
+	// /proc/{pid}/stat format: pid (comm) state fields...
+	// The comm field may contain spaces and parens, so find the last ')' first.
+	content := string(data)
+	idx := strings.LastIndex(content, ")")
+	if idx < 0 {
+		return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: no closing paren", pid)
+	}
+	// After ")" there is " state field3 field4 ... fieldN"
+	// field1 after ')' is state (index 0), utime is field 11, stime is field 12
+	// (0-indexed from after the closing paren).
+	fields := strings.Fields(content[idx+2:])
+	if len(fields) < 13 {
+		return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: too few fields (%d)", pid, len(fields))
+	}
+	utime, err := strconv.ParseUint(fields[11], 10, 64)
+	if err != nil {
+		return cpuStat{}, fmt.Errorf("parse utime: %w", err)
+	}
+	stime, err := strconv.ParseUint(fields[12], 10, 64)
+	if err != nil {
+		return cpuStat{}, fmt.Errorf("parse stime: %w", err)
+	}
+	return cpuStat{utime: utime, stime: stime}, nil
+}
+
+// readMemRSS reads VmRSS from /proc/{pid}/status and returns bytes.
+func readMemRSS(pid int) (int64, error) {
+	path := fmt.Sprintf("/proc/%d/status", pid)
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return 0, fmt.Errorf("read status: %w", err)
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		if strings.HasPrefix(line, "VmRSS:") {
+			fields := strings.Fields(line)
+			if len(fields) < 2 {
+				return 0, fmt.Errorf("malformed VmRSS line")
+			}
+			kb, err := strconv.ParseInt(fields[1], 10, 64)
+			if err != nil {
+				return 0, fmt.Errorf("parse VmRSS: %w", err)
+			}
+			return kb * 1024, nil
+		}
+	}
+	return 0, fmt.Errorf("VmRSS not found in /proc/%d/status", pid)
+}
+
+// readDiskAllocated returns the actual allocated bytes (not apparent size)
+// of the file at path. This uses stat's block count × 512.
+func readDiskAllocated(path string) (int64, error) {
+	var stat syscall.Stat_t
+	if err := syscall.Stat(path, &stat); err != nil {
+		return 0, fmt.Errorf("stat %s: %w", path, err)
+	}
+	return stat.Blocks * 512, nil
+}