forked from wrenn/wrenn
v0.0.1 (#8)
Co-authored-by: Tasnim Kabir Sadik <tksadik92@gmail.com> Reviewed-on: wrenn/sandbox#8
This commit is contained in:
85
internal/sandbox/conntracker.go
Normal file
85
internal/sandbox/conntracker.go
Normal file
@ -0,0 +1,85 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ConnTracker tracks active proxy connections for a single sandbox and
|
||||
// provides a drain mechanism for pre-pause graceful shutdown.
|
||||
// It is safe for concurrent use.
|
||||
type ConnTracker struct {
|
||||
draining atomic.Bool
|
||||
wg sync.WaitGroup
|
||||
|
||||
// cancelMu protects cancelDrain so Reset can signal a timed-out Drain
|
||||
// goroutine to exit, preventing goroutine leaks on repeated pause failures.
|
||||
cancelMu sync.Mutex
|
||||
cancelDrain chan struct{}
|
||||
}
|
||||
|
||||
// Acquire registers one in-flight connection. Returns false if the tracker
|
||||
// is already draining; the caller must not call Release in that case.
|
||||
func (t *ConnTracker) Acquire() bool {
|
||||
if t.draining.Load() {
|
||||
return false
|
||||
}
|
||||
t.wg.Add(1)
|
||||
// Re-check after Add: Drain may have set draining between our Load
|
||||
// and Add. If so, undo the Add and reject the connection.
|
||||
if t.draining.Load() {
|
||||
t.wg.Done()
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Release marks one connection as complete. Must be called exactly once
|
||||
// per successful Acquire.
|
||||
func (t *ConnTracker) Release() {
|
||||
t.wg.Done()
|
||||
}
|
||||
|
||||
// Drain marks the tracker as draining (all future Acquire calls return
|
||||
// false) and waits up to timeout for in-flight connections to finish.
|
||||
func (t *ConnTracker) Drain(timeout time.Duration) {
|
||||
t.draining.Store(true)
|
||||
|
||||
cancel := make(chan struct{})
|
||||
t.cancelMu.Lock()
|
||||
t.cancelDrain = cancel
|
||||
t.cancelMu.Unlock()
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
t.wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-cancel:
|
||||
// Reset was called; stop waiting.
|
||||
case <-time.After(timeout):
|
||||
}
|
||||
}
|
||||
|
||||
// Reset re-enables the tracker after a failed drain. This allows the
|
||||
// sandbox to accept proxy connections again if the pause operation fails
|
||||
// and the VM is resumed. It also cancels any lingering Drain goroutine.
|
||||
func (t *ConnTracker) Reset() {
|
||||
t.cancelMu.Lock()
|
||||
if t.cancelDrain != nil {
|
||||
select {
|
||||
case <-t.cancelDrain:
|
||||
// Already closed.
|
||||
default:
|
||||
close(t.cancelDrain)
|
||||
}
|
||||
t.cancelDrain = nil
|
||||
}
|
||||
t.cancelMu.Unlock()
|
||||
|
||||
t.draining.Store(false)
|
||||
}
|
||||
106
internal/sandbox/images.go
Normal file
106
internal/sandbox/images.go
Normal file
@ -0,0 +1,106 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"git.omukk.dev/wrenn/sandbox/internal/id"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/layout"
|
||||
)
|
||||
|
||||
// DefaultDiskSizeMB is the standard disk size for base images. Images smaller
|
||||
// than this are expanded at startup so that dm-snapshot sandboxes see the full
|
||||
// size without per-sandbox copies. The expansion is sparse — only metadata
|
||||
// changes; no physical disk is consumed beyond the original content.
|
||||
const DefaultDiskSizeMB = 5120 // 5 GB
|
||||
|
||||
// EnsureImageSizes walks template directories and expands any rootfs.ext4 that
|
||||
// is smaller than the target size. This is idempotent: images already at or
|
||||
// above the target size are left untouched. Should be called once at host agent
|
||||
// startup before any sandboxes are created.
|
||||
func EnsureImageSizes(wrennDir string, targetMB int) error {
|
||||
if targetMB <= 0 {
|
||||
targetMB = DefaultDiskSizeMB
|
||||
}
|
||||
targetBytes := int64(targetMB) * 1024 * 1024
|
||||
|
||||
// Expand the built-in minimal image.
|
||||
minimalRootfs := layout.TemplateRootfs(wrennDir, id.PlatformTeamID, id.MinimalTemplateID)
|
||||
if err := expandImage(minimalRootfs, targetBytes, targetMB); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Walk teams/{teamDir}/{templateDir}/rootfs.ext4 two levels deep.
|
||||
teamsDir := layout.TeamsDir(wrennDir)
|
||||
teamEntries, err := os.ReadDir(teamsDir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil // teams dir doesn't exist yet — nothing to expand
|
||||
}
|
||||
return fmt.Errorf("read teams dir: %w", err)
|
||||
}
|
||||
|
||||
for _, teamEntry := range teamEntries {
|
||||
if !teamEntry.IsDir() {
|
||||
continue
|
||||
}
|
||||
teamPath := filepath.Join(teamsDir, teamEntry.Name())
|
||||
templateEntries, err := os.ReadDir(teamPath)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, tmplEntry := range templateEntries {
|
||||
if !tmplEntry.IsDir() {
|
||||
continue
|
||||
}
|
||||
rootfs := filepath.Join(teamPath, tmplEntry.Name(), "rootfs.ext4")
|
||||
if err := expandImage(rootfs, targetBytes, targetMB); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// expandImage expands a single rootfs image if it is smaller than targetBytes.
|
||||
func expandImage(rootfs string, targetBytes int64, targetMB int) error {
|
||||
info, err := os.Stat(rootfs)
|
||||
if err != nil {
|
||||
return nil // not every template dir has a rootfs.ext4
|
||||
}
|
||||
|
||||
if info.Size() >= targetBytes {
|
||||
return nil // already large enough
|
||||
}
|
||||
|
||||
slog.Info("expanding base image",
|
||||
"path", rootfs,
|
||||
"from_mb", info.Size()/(1024*1024),
|
||||
"to_mb", targetMB,
|
||||
)
|
||||
|
||||
// Expand the file (sparse — instant, no physical disk used).
|
||||
if err := os.Truncate(rootfs, targetBytes); err != nil {
|
||||
return fmt.Errorf("truncate %s: %w", rootfs, err)
|
||||
}
|
||||
|
||||
// Check filesystem before resize.
|
||||
if out, err := exec.Command("e2fsck", "-fy", rootfs).CombinedOutput(); err != nil {
|
||||
// e2fsck returns 1 if it fixed errors, which is fine.
|
||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() > 1 {
|
||||
return fmt.Errorf("e2fsck %s: %s: %w", rootfs, string(out), err)
|
||||
}
|
||||
}
|
||||
|
||||
// Grow the ext4 filesystem to fill the new file size.
|
||||
if out, err := exec.Command("resize2fs", rootfs).CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("resize2fs %s: %s: %w", rootfs, string(out), err)
|
||||
}
|
||||
|
||||
slog.Info("base image expanded", "path", rootfs, "size_mb", targetMB)
|
||||
return nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
178
internal/sandbox/metrics.go
Normal file
178
internal/sandbox/metrics.go
Normal file
@ -0,0 +1,178 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MetricPoint holds one metrics sample.
|
||||
type MetricPoint struct {
|
||||
Timestamp time.Time
|
||||
CPUPct float64
|
||||
MemBytes int64
|
||||
DiskBytes int64
|
||||
}
|
||||
|
||||
// Ring buffer capacity constants.
|
||||
const (
|
||||
ring10mCap = 1200 // 500ms × 1200 = 10 min
|
||||
ring2hCap = 240 // 30s × 240 = 2 h
|
||||
ring24hCap = 288 // 5min × 288 = 24 h
|
||||
|
||||
downsample2hEvery = 60 // 60 × 500ms = 30s
|
||||
downsample24hEvery = 10 // 10 × 30s = 5min
|
||||
)
|
||||
|
||||
// metricsRing holds three tiered ring buffers with automatic downsampling
|
||||
// from the finest tier into coarser tiers.
|
||||
type metricsRing struct {
|
||||
mu sync.Mutex
|
||||
|
||||
// 10-minute tier: 500ms samples.
|
||||
buf10m [ring10mCap]MetricPoint
|
||||
idx10m int
|
||||
count10m int
|
||||
|
||||
// 2-hour tier: 30s averages.
|
||||
buf2h [ring2hCap]MetricPoint
|
||||
idx2h int
|
||||
count2h int
|
||||
|
||||
// 24-hour tier: 5min averages.
|
||||
buf24h [ring24hCap]MetricPoint
|
||||
idx24h int
|
||||
count24h int
|
||||
|
||||
// Accumulators for downsampling.
|
||||
acc500ms [downsample2hEvery]MetricPoint
|
||||
acc500msN int
|
||||
|
||||
acc30s [downsample24hEvery]MetricPoint
|
||||
acc30sN int
|
||||
}
|
||||
|
||||
// newMetricsRing creates an empty metrics ring buffer.
|
||||
func newMetricsRing() *metricsRing {
|
||||
return &metricsRing{}
|
||||
}
|
||||
|
||||
// Push adds a 500ms sample to the finest tier and triggers downsampling
|
||||
// into coarser tiers when enough samples have accumulated.
|
||||
func (r *metricsRing) Push(p MetricPoint) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
// Write to 10m ring.
|
||||
r.buf10m[r.idx10m] = p
|
||||
r.idx10m = (r.idx10m + 1) % ring10mCap
|
||||
if r.count10m < ring10mCap {
|
||||
r.count10m++
|
||||
}
|
||||
|
||||
// Accumulate for 2h downsample.
|
||||
r.acc500ms[r.acc500msN] = p
|
||||
r.acc500msN++
|
||||
if r.acc500msN == downsample2hEvery {
|
||||
avg := averagePoints(r.acc500ms[:downsample2hEvery])
|
||||
r.push2h(avg)
|
||||
r.acc500msN = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricsRing) push2h(p MetricPoint) {
|
||||
r.buf2h[r.idx2h] = p
|
||||
r.idx2h = (r.idx2h + 1) % ring2hCap
|
||||
if r.count2h < ring2hCap {
|
||||
r.count2h++
|
||||
}
|
||||
|
||||
// Accumulate for 24h downsample.
|
||||
r.acc30s[r.acc30sN] = p
|
||||
r.acc30sN++
|
||||
if r.acc30sN == downsample24hEvery {
|
||||
avg := averagePoints(r.acc30s[:downsample24hEvery])
|
||||
r.push24h(avg)
|
||||
r.acc30sN = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (r *metricsRing) push24h(p MetricPoint) {
|
||||
r.buf24h[r.idx24h] = p
|
||||
r.idx24h = (r.idx24h + 1) % ring24hCap
|
||||
if r.count24h < ring24hCap {
|
||||
r.count24h++
|
||||
}
|
||||
}
|
||||
|
||||
// Get10m returns the 10-minute tier points in chronological order.
|
||||
func (r *metricsRing) Get10m() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf10m[:], r.idx10m, r.count10m)
|
||||
}
|
||||
|
||||
// Get2h returns the 2-hour tier points in chronological order.
|
||||
func (r *metricsRing) Get2h() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf2h[:], r.idx2h, r.count2h)
|
||||
}
|
||||
|
||||
// Get24h returns the 24-hour tier points in chronological order.
|
||||
func (r *metricsRing) Get24h() []MetricPoint {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
return r.readRing(r.buf24h[:], r.idx24h, r.count24h)
|
||||
}
|
||||
|
||||
// Flush returns all three tiers and resets the ring buffer.
|
||||
func (r *metricsRing) Flush() (pts10m, pts2h, pts24h []MetricPoint) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
pts10m = r.readRing(r.buf10m[:], r.idx10m, r.count10m)
|
||||
pts2h = r.readRing(r.buf2h[:], r.idx2h, r.count2h)
|
||||
pts24h = r.readRing(r.buf24h[:], r.idx24h, r.count24h)
|
||||
|
||||
// Reset all state.
|
||||
r.idx10m, r.count10m = 0, 0
|
||||
r.idx2h, r.count2h = 0, 0
|
||||
r.idx24h, r.count24h = 0, 0
|
||||
r.acc500msN = 0
|
||||
r.acc30sN = 0
|
||||
|
||||
return pts10m, pts2h, pts24h
|
||||
}
|
||||
|
||||
// readRing extracts elements from a circular buffer in chronological order.
|
||||
func (r *metricsRing) readRing(buf []MetricPoint, nextIdx, count int) []MetricPoint {
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
result := make([]MetricPoint, count)
|
||||
bufLen := len(buf)
|
||||
start := (nextIdx - count + bufLen) % bufLen
|
||||
for i := range count {
|
||||
result[i] = buf[(start+i)%bufLen]
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// averagePoints computes the average of a slice of MetricPoints.
|
||||
// The timestamp is set to the last point's timestamp.
|
||||
func averagePoints(pts []MetricPoint) MetricPoint {
|
||||
n := float64(len(pts))
|
||||
var cpu float64
|
||||
var mem, disk int64
|
||||
for _, p := range pts {
|
||||
cpu += p.CPUPct
|
||||
mem += p.MemBytes
|
||||
disk += p.DiskBytes
|
||||
}
|
||||
return MetricPoint{
|
||||
Timestamp: pts[len(pts)-1].Timestamp,
|
||||
CPUPct: cpu / n,
|
||||
MemBytes: int64(float64(mem) / n),
|
||||
DiskBytes: int64(float64(disk) / n),
|
||||
}
|
||||
}
|
||||
83
internal/sandbox/proc.go
Normal file
83
internal/sandbox/proc.go
Normal file
@ -0,0 +1,83 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// cpuStat holds raw CPU jiffies read from /proc/{pid}/stat.
|
||||
type cpuStat struct {
|
||||
utime uint64
|
||||
stime uint64
|
||||
}
|
||||
|
||||
// readCPUStat reads user and system CPU jiffies from /proc/{pid}/stat.
|
||||
// Fields 14 (utime) and 15 (stime) are 1-indexed in the man page;
|
||||
// after splitting on space, they are at indices 13 and 14.
|
||||
func readCPUStat(pid int) (cpuStat, error) {
|
||||
path := fmt.Sprintf("/proc/%d/stat", pid)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("read stat: %w", err)
|
||||
}
|
||||
|
||||
// /proc/{pid}/stat format: pid (comm) state fields...
|
||||
// The comm field may contain spaces and parens, so find the last ')' first.
|
||||
content := string(data)
|
||||
idx := strings.LastIndex(content, ")")
|
||||
if idx < 0 {
|
||||
return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: no closing paren", pid)
|
||||
}
|
||||
// After ")" there is " state field3 field4 ... fieldN"
|
||||
// field1 after ')' is state (index 0), utime is field 11, stime is field 12
|
||||
// (0-indexed from after the closing paren).
|
||||
fields := strings.Fields(content[idx+2:])
|
||||
if len(fields) < 13 {
|
||||
return cpuStat{}, fmt.Errorf("malformed /proc/%d/stat: too few fields (%d)", pid, len(fields))
|
||||
}
|
||||
utime, err := strconv.ParseUint(fields[11], 10, 64)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("parse utime: %w", err)
|
||||
}
|
||||
stime, err := strconv.ParseUint(fields[12], 10, 64)
|
||||
if err != nil {
|
||||
return cpuStat{}, fmt.Errorf("parse stime: %w", err)
|
||||
}
|
||||
return cpuStat{utime: utime, stime: stime}, nil
|
||||
}
|
||||
|
||||
// readMemRSS reads VmRSS from /proc/{pid}/status and returns bytes.
|
||||
func readMemRSS(pid int) (int64, error) {
|
||||
path := fmt.Sprintf("/proc/%d/status", pid)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read status: %w", err)
|
||||
}
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "VmRSS:") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
return 0, fmt.Errorf("malformed VmRSS line")
|
||||
}
|
||||
kb, err := strconv.ParseInt(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse VmRSS: %w", err)
|
||||
}
|
||||
return kb * 1024, nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("VmRSS not found in /proc/%d/status", pid)
|
||||
}
|
||||
|
||||
// readDiskAllocated returns the actual allocated bytes (not apparent size)
|
||||
// of the file at path. This uses stat's block count × 512.
|
||||
func readDiskAllocated(path string) (int64, error) {
|
||||
var stat syscall.Stat_t
|
||||
if err := syscall.Stat(path, &stat); err != nil {
|
||||
return 0, fmt.Errorf("stat %s: %w", path, err)
|
||||
}
|
||||
return stat.Blocks * 512, nil
|
||||
}
|
||||
Reference in New Issue
Block a user