forked from wrenn/wrenn
v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev> Reviewed-on: wrenn/wrenn#50
This commit is contained in:
28
internal/sandbox/chversion.go
Normal file
28
internal/sandbox/chversion.go
Normal file
@ -0,0 +1,28 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DetectCHVersion runs the cloud-hypervisor binary with --version and
|
||||
// parses the semver from the output (e.g. "cloud-hypervisor v43.0" → "43.0").
|
||||
func DetectCHVersion(binaryPath string) (string, error) {
|
||||
out, err := exec.Command(binaryPath, "--version").Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("run %s --version: %w", binaryPath, err)
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(string(out))
|
||||
for field := range strings.FieldsSeq(line) {
|
||||
v := strings.TrimPrefix(field, "v")
|
||||
if v != field || strings.Contains(field, ".") {
|
||||
if strings.Count(v, ".") >= 1 {
|
||||
return v, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("could not parse version from cloud-hypervisor output: %q", line)
|
||||
}
|
||||
@ -10,12 +10,22 @@ import (
|
||||
// ConnTracker tracks active proxy connections for a single sandbox and
|
||||
// provides a drain mechanism for pre-pause graceful shutdown.
|
||||
// It is safe for concurrent use.
|
||||
//
|
||||
// Internally we do not use sync.WaitGroup because Wait cannot be interrupted
|
||||
// — a stuck handler would pin the waiter goroutine forever. Instead we keep
|
||||
// an explicit counter guarded by mu plus a zeroCh that is closed when the
|
||||
// counter transitions to 0, allowing Drain/ForceClose to select on it
|
||||
// alongside cancellation and timeout signals without spawning helper
|
||||
// goroutines that could leak across Reset boundaries.
|
||||
type ConnTracker struct {
|
||||
draining atomic.Bool
|
||||
wg sync.WaitGroup
|
||||
|
||||
mu sync.Mutex
|
||||
count int
|
||||
zeroCh chan struct{} // closed when count drops to 0; recreated on next Acquire
|
||||
|
||||
// cancelMu protects cancelDrain so Reset can signal a timed-out Drain
|
||||
// goroutine to exit, preventing goroutine leaks on repeated pause failures.
|
||||
// to exit early.
|
||||
cancelMu sync.Mutex
|
||||
cancelDrain chan struct{}
|
||||
|
||||
@ -40,13 +50,18 @@ func (t *ConnTracker) Acquire() bool {
|
||||
if t.draining.Load() {
|
||||
return false
|
||||
}
|
||||
t.wg.Add(1)
|
||||
// Re-check after Add: Drain may have set draining between our Load
|
||||
// and Add. If so, undo the Add and reject the connection.
|
||||
t.mu.Lock()
|
||||
// Re-check under mu so a concurrent Drain that flipped draining cannot
|
||||
// race past us with the counter already incremented.
|
||||
if t.draining.Load() {
|
||||
t.wg.Done()
|
||||
t.mu.Unlock()
|
||||
return false
|
||||
}
|
||||
t.count++
|
||||
if t.count == 1 {
|
||||
t.zeroCh = make(chan struct{})
|
||||
}
|
||||
t.mu.Unlock()
|
||||
return true
|
||||
}
|
||||
|
||||
@ -63,11 +78,32 @@ func (t *ConnTracker) Context() context.Context {
|
||||
// Release marks one connection as complete. Must be called exactly once
|
||||
// per successful Acquire.
|
||||
func (t *ConnTracker) Release() {
|
||||
t.wg.Done()
|
||||
t.mu.Lock()
|
||||
t.count--
|
||||
if t.count == 0 && t.zeroCh != nil {
|
||||
close(t.zeroCh)
|
||||
t.zeroCh = nil
|
||||
}
|
||||
t.mu.Unlock()
|
||||
}
|
||||
|
||||
// waitDrain returns a channel that closes when the in-flight count is zero,
|
||||
// or a closed channel immediately if there's nothing in flight.
|
||||
func (t *ConnTracker) waitDrain() <-chan struct{} {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if t.count == 0 {
|
||||
ch := make(chan struct{})
|
||||
close(ch)
|
||||
return ch
|
||||
}
|
||||
return t.zeroCh
|
||||
}
|
||||
|
||||
// Drain marks the tracker as draining (all future Acquire calls return
|
||||
// false) and waits up to timeout for in-flight connections to finish.
|
||||
// Returns when the count hits 0, Reset is called, or the timeout fires —
|
||||
// whichever happens first. No goroutine is leaked on timeout.
|
||||
func (t *ConnTracker) Drain(timeout time.Duration) {
|
||||
t.draining.Store(true)
|
||||
|
||||
@ -76,16 +112,9 @@ func (t *ConnTracker) Drain(timeout time.Duration) {
|
||||
t.cancelDrain = cancel
|
||||
t.cancelMu.Unlock()
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
t.wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-t.waitDrain():
|
||||
case <-cancel:
|
||||
// Reset was called; stop waiting.
|
||||
case <-time.After(timeout):
|
||||
}
|
||||
}
|
||||
@ -101,22 +130,16 @@ func (t *ConnTracker) ForceClose() {
|
||||
}
|
||||
t.ctxMu.Unlock()
|
||||
|
||||
// Wait briefly for force-closed connections to call Release().
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
t.wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
case <-t.waitDrain():
|
||||
case <-time.After(2 * time.Second):
|
||||
}
|
||||
}
|
||||
|
||||
// Reset re-enables the tracker after a failed drain. This allows the
|
||||
// sandbox to accept proxy connections again if the pause operation fails
|
||||
// and the VM is resumed. It also cancels any lingering Drain goroutine
|
||||
// and creates a fresh context for new connections.
|
||||
// and the VM is resumed. It also signals any lingering Drain to exit and
|
||||
// creates a fresh context for new connections.
|
||||
func (t *ConnTracker) Reset() {
|
||||
t.cancelMu.Lock()
|
||||
if t.cancelDrain != nil {
|
||||
@ -130,7 +153,6 @@ func (t *ConnTracker) Reset() {
|
||||
}
|
||||
t.cancelMu.Unlock()
|
||||
|
||||
// Replace the cancelled context with a fresh one.
|
||||
t.ctxMu.Lock()
|
||||
t.ctx, t.cancel = context.WithCancel(context.Background())
|
||||
t.ctxMu.Unlock()
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DetectFirecrackerVersion runs the firecracker binary with --version and
|
||||
// parses the semver from the output (e.g. "Firecracker v1.14.1" → "1.14.1").
|
||||
func DetectFirecrackerVersion(binaryPath string) (string, error) {
|
||||
out, err := exec.Command(binaryPath, "--version").Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("run %s --version: %w", binaryPath, err)
|
||||
}
|
||||
|
||||
// Output is typically "Firecracker v1.14.1\n" or similar.
|
||||
line := strings.TrimSpace(string(out))
|
||||
for _, field := range strings.Fields(line) {
|
||||
v := strings.TrimPrefix(field, "v")
|
||||
if v != field || strings.Contains(field, ".") {
|
||||
// Either had a "v" prefix or contains a dot — likely the version.
|
||||
if strings.Count(v, ".") >= 1 {
|
||||
return v, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("could not parse version from firecracker output: %q", line)
|
||||
}
|
||||
@ -9,6 +9,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/layout"
|
||||
"git.omukk.dev/wrenn/wrenn/pkg/id"
|
||||
)
|
||||
@ -29,13 +31,9 @@ func EnsureImageSizes(wrennDir string, targetMB int) error {
|
||||
}
|
||||
targetBytes := int64(targetMB) * 1024 * 1024
|
||||
|
||||
// Expand the built-in minimal image.
|
||||
minimalRootfs := layout.TemplateRootfs(wrennDir, id.PlatformTeamID, id.MinimalTemplateID)
|
||||
if err := expandImage(minimalRootfs, targetBytes, targetMB); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Walk teams/{teamDir}/{templateDir}/rootfs.ext4 two levels deep.
|
||||
// Walk teams/{teamDir}/{templateDir}/rootfs.ext4 two levels deep. The
|
||||
// built-in system base templates live under teams/{base36(0)}/... so this
|
||||
// covers them too.
|
||||
teamsDir := layout.TeamsDir(wrennDir)
|
||||
teamEntries, err := os.ReadDir(teamsDir)
|
||||
if err != nil {
|
||||
@ -104,12 +102,19 @@ func ParseSizeToMB(s string) (int, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// ShrinkMinimalImage shrinks the built-in minimal rootfs back to its minimum
|
||||
// size using resize2fs -M. This is the inverse of EnsureImageSizes and should
|
||||
// be called during graceful shutdown so the image is stored compactly on disk.
|
||||
func ShrinkMinimalImage(wrennDir string) {
|
||||
minimalRootfs := layout.TemplateRootfs(wrennDir, id.PlatformTeamID, id.MinimalTemplateID)
|
||||
shrinkImage(minimalRootfs)
|
||||
// ShrinkSystemImages shrinks the built-in system base rootfs images back to
|
||||
// their minimum size using resize2fs -M. This is the inverse of
|
||||
// EnsureImageSizes and should be called during graceful shutdown so the images
|
||||
// are stored compactly on disk.
|
||||
func ShrinkSystemImages(wrennDir string) {
|
||||
for _, tmplID := range []pgtype.UUID{
|
||||
id.UbuntuTemplateID,
|
||||
id.AlpineTemplateID,
|
||||
id.ArchTemplateID,
|
||||
id.FedoraTemplateID,
|
||||
} {
|
||||
shrinkImage(layout.TemplateRootfs(wrennDir, id.PlatformTeamID, tmplID))
|
||||
}
|
||||
}
|
||||
|
||||
// shrinkImage shrinks a single rootfs image to its minimum size.
|
||||
|
||||
187
internal/sandbox/launch_snapshot.go
Normal file
187
internal/sandbox/launch_snapshot.go
Normal file
@ -0,0 +1,187 @@
|
||||
// Package sandbox: launching a fresh sandbox from a snapshot template.
|
||||
//
|
||||
// Mirrors the pause/resume restore path but produces a brand-new sandbox each
|
||||
// call: fresh ID, fresh network slot, fresh CoW on top of the template's
|
||||
// flattened rootfs. The CH process is launched with --restore + lazy memory
|
||||
// (UFFD), and the post-restore memory loader is started so any subsequent
|
||||
// CreateSnapshot taken from this descendant is self-contained (the
|
||||
// pause-resume-pause chain guarantee, applied to template lineages).
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/devicemapper"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/layout"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/models"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/network"
|
||||
"git.omukk.dev/wrenn/wrenn/pkg/id"
|
||||
)
|
||||
|
||||
// createFromSnapshotTemplate launches a new sandbox from a snapshot-template
|
||||
// directory (state.json + config.json + memory-ranges + rootfs.ext4).
|
||||
//
|
||||
// The caller has already verified IsSnapshotTemplate(templateDir). Resources
|
||||
// acquired here are rolled back on any failure; on success the sandbox is
|
||||
// registered in m.boxes and runs in StatusRunning.
|
||||
func (m *Manager) createFromSnapshotTemplate(
|
||||
ctx context.Context,
|
||||
sandboxID string,
|
||||
teamID, templateID pgtype.UUID,
|
||||
vcpus, memoryMB, timeoutSec, diskSizeMB int,
|
||||
defaultUser string,
|
||||
defaultEnv map[string]string,
|
||||
) (*models.Sandbox, int64, error) {
|
||||
templateDir := layout.TemplateDir(m.cfg.WrennDir, teamID, templateID)
|
||||
baseRootfs := layout.TemplateRootfs(m.cfg.WrennDir, teamID, templateID)
|
||||
|
||||
meta, err := readSnapshotMeta(templateDir)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("read snapshot meta: %w", err)
|
||||
}
|
||||
if meta.SandboxDir == "" {
|
||||
// CH's saved config.json hardcodes a tmpfs disk path; meta.SandboxDir
|
||||
// is that exact path. A snapshot template without it cannot be launched.
|
||||
return nil, 0, fmt.Errorf("snapshot template %s missing sandbox_dir in meta", templateDir)
|
||||
}
|
||||
|
||||
// Acquire shared read-only loop on the flattened rootfs. Many sandboxes
|
||||
// can share this loop concurrently — refcounted in LoopRegistry.
|
||||
originLoop, err := m.loops.Acquire(baseRootfs)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("acquire loop: %w", err)
|
||||
}
|
||||
originSize, err := devicemapper.OriginSizeBytes(originLoop)
|
||||
if err != nil {
|
||||
m.loops.Release(baseRootfs)
|
||||
return nil, 0, fmt.Errorf("origin size: %w", err)
|
||||
}
|
||||
|
||||
// Per-sandbox CoW on top of the shared origin.
|
||||
dmName := "wrenn-" + sandboxID
|
||||
if err := os.MkdirAll(layout.SandboxDir(m.cfg.WrennDir, sandboxID), 0o755); err != nil {
|
||||
m.loops.Release(baseRootfs)
|
||||
return nil, 0, fmt.Errorf("create sandbox dir: %w", err)
|
||||
}
|
||||
cowPath := layout.SandboxCowPath(m.cfg.WrennDir, sandboxID)
|
||||
cowSize := max(int64(diskSizeMB)*1024*1024, originSize)
|
||||
dmDev, err := devicemapper.CreateSnapshot(dmName, originLoop, cowPath, originSize, cowSize)
|
||||
if err != nil {
|
||||
m.loops.Release(baseRootfs)
|
||||
return nil, 0, fmt.Errorf("create dm-snapshot: %w", err)
|
||||
}
|
||||
|
||||
res := &createResources{
|
||||
sandboxID: sandboxID,
|
||||
loops: m.loops,
|
||||
loopImage: baseRootfs,
|
||||
dmDevice: dmDev,
|
||||
cowPath: cowPath,
|
||||
slots: m.slots,
|
||||
}
|
||||
|
||||
slotIdx, err := m.slots.Allocate()
|
||||
if err != nil {
|
||||
res.rollback()
|
||||
return nil, 0, fmt.Errorf("allocate network slot: %w", err)
|
||||
}
|
||||
res.slotIdx = slotIdx
|
||||
slot := network.NewSlot(slotIdx)
|
||||
|
||||
if err := network.CreateNetwork(slot); err != nil {
|
||||
res.rollback()
|
||||
return nil, 0, fmt.Errorf("create network: %w", err)
|
||||
}
|
||||
res.slot = slot
|
||||
|
||||
// CH's saved config.json hardcodes a tmpfs disk path; meta.SandboxDir is
|
||||
// that exact path (carried forward verbatim across template chains, so a
|
||||
// snapshot-of-a-snapshot resolves to the root ancestor's path). The
|
||||
// launcher mounts a fresh tmpfs there inside its private mount namespace
|
||||
// and symlinks rootfs.ext4 → our new dm device.
|
||||
vmCfg := m.buildRestoreVMConfig(restoreInputs{
|
||||
sandboxID: sandboxID,
|
||||
templateID: id.UUIDString(templateID),
|
||||
snapDir: templateDir,
|
||||
rootfsPath: dmDev.DevicePath,
|
||||
vcpus: vcpus,
|
||||
memoryMB: memoryMB,
|
||||
slot: slot,
|
||||
sandboxDir: meta.SandboxDir,
|
||||
})
|
||||
|
||||
client, err := m.launchRestoredVM(ctx, vmCfg, slot.HostIP.String())
|
||||
if err != nil {
|
||||
res.rollback()
|
||||
return nil, 0, err
|
||||
}
|
||||
res.vm = m.vm
|
||||
|
||||
envdVersion, _ := client.FetchVersion(ctx)
|
||||
|
||||
now := time.Now()
|
||||
sb := &sandboxState{
|
||||
Sandbox: models.Sandbox{
|
||||
ID: sandboxID,
|
||||
Status: models.StatusRunning,
|
||||
TemplateTeamID: teamID.Bytes,
|
||||
TemplateID: templateID.Bytes,
|
||||
VCPUs: vcpus,
|
||||
MemoryMB: memoryMB,
|
||||
TimeoutSec: timeoutSec,
|
||||
SlotIndex: slotIdx,
|
||||
HostIP: slot.HostIP,
|
||||
RootfsPath: dmDev.DevicePath,
|
||||
CreatedAt: now,
|
||||
LastActiveAt: now,
|
||||
Metadata: m.buildMetadata(envdVersion),
|
||||
},
|
||||
slot: slot,
|
||||
connTracker: &ConnTracker{},
|
||||
dmDevice: dmDev,
|
||||
baseImagePath: baseRootfs,
|
||||
sandboxDirOverride: meta.SandboxDir,
|
||||
}
|
||||
sb.client.Store(client)
|
||||
|
||||
m.mu.Lock()
|
||||
m.boxes[sandboxID] = sb
|
||||
m.mu.Unlock()
|
||||
|
||||
// /init lifecycle bump then start the memory loader. Loader is required
|
||||
// so any future CreateSnapshot taken from this descendant captures all
|
||||
// guest pages (otherwise SEEK_DATA/SEEK_HOLE would emit holes for the
|
||||
// still-lazy UFFD pages — silent corruption across template chains).
|
||||
m.initAndStartMemoryLoader(ctx, sb, defaultUser, id.UUIDString(templateID), defaultEnv)
|
||||
|
||||
m.startSampler(sb)
|
||||
m.startCrashWatcher(sb)
|
||||
|
||||
slog.Info("sandbox launched from snapshot template",
|
||||
"id", sandboxID,
|
||||
"team_id", teamID,
|
||||
"template_id", templateID,
|
||||
"sandbox_dir", meta.SandboxDir,
|
||||
"host_ip", slot.HostIP.String(),
|
||||
"dm_device", dmDev.DevicePath,
|
||||
)
|
||||
|
||||
return &sb.Sandbox, cowSize, nil
|
||||
}
|
||||
|
||||
// templateExists returns true if a snapshot template already lives at
|
||||
// TemplateDir(team, templateID). Used by CreateSnapshot to refuse silent
|
||||
// overwrites — every snapshot must land in a fresh templateID.
|
||||
func (m *Manager) templateExists(teamID, templateID pgtype.UUID) bool {
|
||||
dir := layout.TemplateDir(m.cfg.WrennDir, teamID, templateID)
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
return false
|
||||
}
|
||||
return layout.IsSnapshotTemplate(dir)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
1180
internal/sandbox/pause.go
Normal file
1180
internal/sandbox/pause.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,13 +1,14 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/envdclient"
|
||||
)
|
||||
@ -48,42 +49,43 @@ func readCPUStat(pid int) (cpuStat, error) {
|
||||
return cpuStat{utime: utime, stime: stime}, nil
|
||||
}
|
||||
|
||||
// readEnvdMemUsed fetches mem_used from envd's /metrics endpoint. Returns
|
||||
// guest-side total - MemAvailable (actual process memory, excluding reclaimable
|
||||
// page cache). VmRSS of the Firecracker process includes guest page cache and
|
||||
// never decreases, so this is the accurate metric for dashboard display.
|
||||
func readEnvdMemUsed(client *envdclient.Client) (int64, error) {
|
||||
resp, err := client.HTTPClient().Get(client.BaseURL() + "/metrics")
|
||||
// envdMetrics holds metric values read from envd's /metrics endpoint.
|
||||
type envdMetrics struct {
|
||||
MemBytes int64
|
||||
DiskBytes int64
|
||||
}
|
||||
|
||||
// readEnvdMetrics fetches mem_used and disk_used from envd's /metrics endpoint.
|
||||
// Returns guest-side process memory (total - available) and filesystem usage
|
||||
// from statfs("/"). These are the guest-visible metrics users care about.
|
||||
func readEnvdMetrics(ctx context.Context, client *envdclient.Client) (envdMetrics, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, client.BaseURL()+"/metrics", nil)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("fetch envd metrics: %w", err)
|
||||
return envdMetrics{}, fmt.Errorf("build metrics request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := client.HTTPClient().Do(req)
|
||||
if err != nil {
|
||||
return envdMetrics{}, fmt.Errorf("fetch envd metrics: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return 0, fmt.Errorf("envd metrics: status %d", resp.StatusCode)
|
||||
return envdMetrics{}, fmt.Errorf("envd metrics: status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("read envd metrics body: %w", err)
|
||||
return envdMetrics{}, fmt.Errorf("read envd metrics body: %w", err)
|
||||
}
|
||||
|
||||
var m struct {
|
||||
MemUsed int64 `json:"mem_used"`
|
||||
MemUsed int64 `json:"mem_used"`
|
||||
DiskUsed int64 `json:"disk_used"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &m); err != nil {
|
||||
return 0, fmt.Errorf("decode envd metrics: %w", err)
|
||||
return envdMetrics{}, fmt.Errorf("decode envd metrics: %w", err)
|
||||
}
|
||||
|
||||
return m.MemUsed, nil
|
||||
}
|
||||
|
||||
// readDiskAllocated returns the actual allocated bytes (not apparent size)
|
||||
// of the file at path. This uses stat's block count × 512.
|
||||
func readDiskAllocated(path string) (int64, error) {
|
||||
var stat syscall.Stat_t
|
||||
if err := syscall.Stat(path, &stat); err != nil {
|
||||
return 0, fmt.Errorf("stat %s: %w", path, err)
|
||||
}
|
||||
return stat.Blocks * 512, nil
|
||||
return envdMetrics{MemBytes: m.MemUsed, DiskBytes: m.DiskUsed}, nil
|
||||
}
|
||||
|
||||
186
internal/sandbox/punch.go
Normal file
186
internal/sandbox/punch.go
Normal file
@ -0,0 +1,186 @@
|
||||
// Package sandbox: post-snapshot hole punching for memory-ranges files.
|
||||
//
|
||||
// CH v52's SEEK_DATA/SEEK_HOLE snapshot writer only skips ranges already
|
||||
// hole in the source memfd. Pages the guest never reported as free are
|
||||
// written verbatim — including pages whose contents happen to be all zero
|
||||
// (fresh allocations the guest scribbled then released without telling the
|
||||
// balloon driver). Walking the resulting file and punching any 4 KiB block
|
||||
// of zeros recovers that space without any guest cooperation.
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// punchBlockSize is the granularity at which we test for zero runs and
|
||||
// issue FALLOC_FL_PUNCH_HOLE. Matches the kernel page size and the
|
||||
// minimum hole size on ext4.
|
||||
punchBlockSize = 4096
|
||||
|
||||
// punchReadSize is the IO chunk size used by the scan loop. We read
|
||||
// many blocks per syscall and split them in-memory so a 20 GiB
|
||||
// memory-ranges file costs ~20K read(2) syscalls instead of ~5M.
|
||||
// Crucial under single-disk hosts where each syscall otherwise
|
||||
// contends with sshd / journal IO.
|
||||
punchReadSize = 1 << 20 // 1 MiB = 256 blocks
|
||||
)
|
||||
|
||||
// punchZeroPagesInDir runs punchZeroPages on every memory* file in dir.
|
||||
// CH writes its memory dump as one or more files prefixed "memory" inside
|
||||
// the snapshot directory; everything else (config.json, state.json) is
|
||||
// metadata and untouched.
|
||||
func punchZeroPagesInDir(dir string) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
slog.Warn("punch: read snapshot dir", "dir", dir, "error", err)
|
||||
return
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasPrefix(e.Name(), "memory") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(dir, e.Name())
|
||||
before, after, err := punchZeroPages(path)
|
||||
if err != nil {
|
||||
slog.Warn("punch: zero-page scan failed", "path", path, "error", err)
|
||||
continue
|
||||
}
|
||||
slog.Info("punch: zero-page scan done",
|
||||
"path", path,
|
||||
"alloc_before", before,
|
||||
"alloc_after", after,
|
||||
"reclaimed", before-after)
|
||||
}
|
||||
}
|
||||
|
||||
// punchZeroPages scans path block-by-block, batching runs of all-zero 4 KiB
|
||||
// blocks and punching them out via FALLOC_FL_PUNCH_HOLE. Existing holes are
|
||||
// skipped via SEEK_DATA so a partially-sparse input stays cheap to scan.
|
||||
//
|
||||
// Returns the file's disk allocation (st_blocks * 512) before and after.
|
||||
func punchZeroPages(path string) (int64, int64, error) {
|
||||
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
stBefore, err := statBlocks(f)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat before: %w", err)
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat: %w", err)
|
||||
}
|
||||
size := fi.Size()
|
||||
|
||||
buf := make([]byte, punchReadSize)
|
||||
off := int64(0)
|
||||
|
||||
for off < size {
|
||||
// Skip ahead to next data region; nothing to do in holes.
|
||||
next, err := f.Seek(off, 3) // SEEK_DATA = 3
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, unix.ENXIO) {
|
||||
break
|
||||
}
|
||||
return 0, 0, fmt.Errorf("seek_data @ %d: %w", off, err)
|
||||
}
|
||||
off = next &^ (punchBlockSize - 1) // align down to block
|
||||
|
||||
// Find end of this data extent.
|
||||
endData, err := f.Seek(off, 4) // SEEK_HOLE = 4
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("seek_hole @ %d: %w", off, err)
|
||||
}
|
||||
|
||||
// Scan [off, endData) chunk by chunk; batch zero runs across both
|
||||
// intra-chunk and inter-chunk boundaries so a contiguous zero
|
||||
// region is punched in a single fallocate.
|
||||
zeroStart := int64(-1)
|
||||
cur := off
|
||||
for cur < endData {
|
||||
toRead := min(int64(len(buf)), endData-cur)
|
||||
n, err := readAt(f, buf[:toRead], cur)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("read @ %d: %w", cur, err)
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
// Walk the chunk one block at a time, tracking zero runs.
|
||||
for blkOff := 0; blkOff < n; blkOff += punchBlockSize {
|
||||
blkEnd := min(blkOff+punchBlockSize, n)
|
||||
blk := buf[blkOff:blkEnd]
|
||||
blkAbs := cur + int64(blkOff)
|
||||
if isZero(blk) && len(blk) == punchBlockSize {
|
||||
if zeroStart < 0 {
|
||||
zeroStart = blkAbs
|
||||
}
|
||||
} else if zeroStart >= 0 {
|
||||
if err := punch(f, zeroStart, blkAbs-zeroStart); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
zeroStart = -1
|
||||
}
|
||||
}
|
||||
cur += int64(n)
|
||||
}
|
||||
if zeroStart >= 0 {
|
||||
if err := punch(f, zeroStart, cur-zeroStart); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
}
|
||||
off = endData
|
||||
}
|
||||
|
||||
stAfter, err := statBlocks(f)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat after: %w", err)
|
||||
}
|
||||
return stBefore, stAfter, nil
|
||||
}
|
||||
|
||||
func punch(f *os.File, off, length int64) error {
|
||||
mode := uint32(unix.FALLOC_FL_PUNCH_HOLE | unix.FALLOC_FL_KEEP_SIZE)
|
||||
if err := unix.Fallocate(int(f.Fd()), mode, off, length); err != nil {
|
||||
return fmt.Errorf("fallocate punch @ %d len %d: %w", off, length, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readAt(f *os.File, buf []byte, off int64) (int, error) {
|
||||
n, err := f.ReadAt(buf, off)
|
||||
if err == io.EOF {
|
||||
return n, nil
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func isZero(b []byte) bool {
|
||||
for _, x := range b {
|
||||
if x != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func statBlocks(f *os.File) (int64, error) {
|
||||
var st unix.Stat_t
|
||||
if err := unix.Fstat(int(f.Fd()), &st); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int64(st.Blocks) * 512, nil
|
||||
}
|
||||
118
internal/sandbox/restore.go
Normal file
118
internal/sandbox/restore.go
Normal file
@ -0,0 +1,118 @@
|
||||
// Package sandbox: shared CH-restore helpers used by both Resume (paused →
|
||||
// running) and the snapshot-template launch path (template → fresh sandbox).
|
||||
//
|
||||
// The two callers diverge in how they acquire resources (slot, dm-snapshot,
|
||||
// sandbox identity) but converge on:
|
||||
//
|
||||
// build VMConfig → CreateFromSnapshot → vm.Resume → wait envd → balloon deflate
|
||||
//
|
||||
// These steps are extracted here so the sequence — and its quirks (paused
|
||||
// post-restore state, balloon best-effort, restored disk path baked into
|
||||
// CH's config.json) — has a single source of truth.
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/envdclient"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/network"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/vm"
|
||||
)
|
||||
|
||||
// restoreInputs is the common set of fields needed to build a restore VMConfig.
|
||||
type restoreInputs struct {
|
||||
sandboxID string // VM identity for the new CH process (sock path, log file)
|
||||
templateID string // forwarded to envd via PostInit (informational)
|
||||
snapDir string // directory containing CH snapshot artefacts
|
||||
rootfsPath string // /dev/mapper/wrenn-{newID} — per-sandbox dm-snapshot
|
||||
vcpus int
|
||||
memoryMB int
|
||||
slot *network.Slot
|
||||
sandboxDir string // override for VMConfig.SandboxDir; "" = default
|
||||
}
|
||||
|
||||
// buildRestoreVMConfig assembles the VMConfig used to launch a CH process in
|
||||
// restore mode. sandboxDir, when non-empty, overrides the default
|
||||
// "/tmp/ch-vm-{SandboxID}" — required when the snapshot's saved config.json
|
||||
// points at a different sandbox's tmpfs path (i.e. snapshot-template launch).
|
||||
func (m *Manager) buildRestoreVMConfig(in restoreInputs) vm.VMConfig {
|
||||
return vm.VMConfig{
|
||||
SandboxID: in.sandboxID,
|
||||
TemplateID: in.templateID,
|
||||
KernelPath: m.cfg.KernelPath,
|
||||
RootfsPath: in.rootfsPath,
|
||||
VCPUs: in.vcpus,
|
||||
MemoryMB: in.memoryMB,
|
||||
NetworkNamespace: in.slot.NamespaceID,
|
||||
TapDevice: in.slot.TapName,
|
||||
TapMAC: in.slot.TapMAC,
|
||||
GuestIP: in.slot.GuestIP,
|
||||
GatewayIP: in.slot.TapIP,
|
||||
NetMask: in.slot.GuestNetMask,
|
||||
VMMBin: m.cfg.VMMBin,
|
||||
LogDir: filepath.Join(m.cfg.WrennDir, "logs"),
|
||||
RestoreFromDir: in.snapDir,
|
||||
RestoreLazyMemory: true,
|
||||
SandboxDir: in.sandboxDir,
|
||||
}
|
||||
}
|
||||
|
||||
// launchRestoredVM starts CH in restore mode, resumes the vCPUs, waits for
|
||||
// envd to be reachable, then best-effort deflates the balloon. On any failure
|
||||
// the partial VM is destroyed before returning — the caller is responsible
|
||||
// for tearing down dm/network/slot.
|
||||
//
|
||||
// Returns the connected envd client on success.
|
||||
func (m *Manager) launchRestoredVM(ctx context.Context, vmCfg vm.VMConfig, hostIP string) (*envdclient.Client, error) {
|
||||
if _, err := m.vm.CreateFromSnapshot(ctx, vmCfg); err != nil {
|
||||
return nil, fmt.Errorf("create from snapshot: %w", err)
|
||||
}
|
||||
|
||||
if err := m.vm.Resume(ctx, vmCfg.SandboxID); err != nil {
|
||||
_ = m.vm.Destroy(context.Background(), vmCfg.SandboxID)
|
||||
return nil, fmt.Errorf("vm resume: %w", err)
|
||||
}
|
||||
|
||||
client := envdclient.New(hostIP)
|
||||
waitCtx, waitCancel := context.WithTimeout(ctx, envdReadyTimeout(vmCfg.MemoryMB))
|
||||
defer waitCancel()
|
||||
if err := client.WaitUntilReady(waitCtx); err != nil {
|
||||
_ = m.vm.Destroy(context.Background(), vmCfg.SandboxID)
|
||||
return nil, fmt.Errorf("wait envd: %w", err)
|
||||
}
|
||||
|
||||
// Best-effort balloon deflate. Free-page reporting drains pages while the
|
||||
// sandbox runs; the resumed guest needs its full memory budget back. A
|
||||
// failure leaves the guest memory-starved but doesn't break correctness.
|
||||
if err := m.vm.UpdateBalloon(ctx, vmCfg.SandboxID, 0); err != nil {
|
||||
slog.Warn("balloon deflate after restore failed", "id", vmCfg.SandboxID, "error", err)
|
||||
}
|
||||
|
||||
return client, nil
|
||||
}
|
||||
|
||||
// initAndStartMemoryLoader runs envd's /init lifecycle bump and then kicks
|
||||
// off the background memory loader. Ordering matters: /init resets envd's
|
||||
// mem_preload_* atomics, so the loader's POST /memory/preload must land
|
||||
// after — otherwise the next CreateSnapshot/Pause would observe a stale
|
||||
// "idle" state and snapshot a memfile full of holes.
|
||||
//
|
||||
// Must be called with sb already registered in m.boxes with StatusRunning
|
||||
// and sb.client populated.
|
||||
func (m *Manager) initAndStartMemoryLoader(ctx context.Context, sb *sandboxState, defaultUser, templateIDStr string, envVars map[string]string) {
|
||||
initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
|
||||
defer initCancel()
|
||||
c := sb.client.Load()
|
||||
if c == nil {
|
||||
slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID)
|
||||
return
|
||||
}
|
||||
if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr); err != nil {
|
||||
slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err)
|
||||
}
|
||||
|
||||
m.startMemoryLoader(sb)
|
||||
}
|
||||
208
internal/sandbox/restore_paused.go
Normal file
208
internal/sandbox/restore_paused.go
Normal file
@ -0,0 +1,208 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/layout"
|
||||
"git.omukk.dev/wrenn/wrenn/internal/models"
|
||||
)
|
||||
|
||||
// RestorePausedSandboxes scans WRENN_DIR/sandboxes/ for paused-sandbox
|
||||
// snapshots left behind by a previous agent instance and re-registers them
|
||||
// in m.boxes as StatusPaused. Without this, ListSandboxes would not report
|
||||
// these sandboxes, and the CP's HostMonitor would mark them stopped via
|
||||
// the missing-confirmed-dead reconcile path — orphaning the on-disk
|
||||
// snapshot dir and surfacing a leaked "stopped" sandbox to users.
|
||||
//
|
||||
// Restored sandboxes hold ONLY the slot reservation; VM / network / dm /
|
||||
// loop refcount stay unowned until Resume rebuilds them. baseImagePath is
|
||||
// deliberately NOT set on the in-memory entry so cleanup() does not call
|
||||
// loops.Release on a loop that was never Acquire'd — the registry tolerates
|
||||
// a Release of an unknown key, but a coincident-same-base running sandbox
|
||||
// would have its refcount decremented incorrectly.
|
||||
//
|
||||
// Must be called once at agent startup, AFTER CleanupOrphanPauseDirs (so
|
||||
// .staging-* / .trash-* dirs are gone) and BEFORE the HTTP server starts
|
||||
// serving — otherwise an early Create RPC can race the slot reservation.
|
||||
//
|
||||
// Corrupt snapshot dirs (unparseable meta, missing slot index) are renamed
|
||||
// to .trash-{ts}/ so a future CleanupOrphanPauseDirs sweeps them. Soft
|
||||
// errors are logged; this function never returns an error — startup should
|
||||
// not fail because a single sandbox is unrecoverable.
|
||||
func (m *Manager) RestorePausedSandboxes() {
|
||||
sandboxesDir := layout.SandboxesDir(m.cfg.WrennDir)
|
||||
entries, err := os.ReadDir(sandboxesDir)
|
||||
if err != nil {
|
||||
// Directory does not exist yet — fresh install, nothing to restore.
|
||||
return
|
||||
}
|
||||
|
||||
type candidate struct {
|
||||
sandboxID string
|
||||
snapDir string
|
||||
meta *snapshotMeta
|
||||
teamID [16]byte
|
||||
templID [16]byte
|
||||
}
|
||||
|
||||
// Pass 1: parse every snapshot meta. Trash anything unreadable or
|
||||
// missing the slot index — those are crash artefacts, not recoverable
|
||||
// sandboxes.
|
||||
candidates := make([]candidate, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := e.Name()
|
||||
// Skip CleanupOrphanPauseDirs's territory. If it ran before us
|
||||
// these are already gone; if not, leave them alone.
|
||||
if strings.Contains(name, ".staging-") || strings.Contains(name, ".trash-") {
|
||||
continue
|
||||
}
|
||||
|
||||
snapDir := layout.PauseSnapshotDir(m.cfg.WrennDir, name)
|
||||
meta, err := readSnapshotMeta(snapDir)
|
||||
if err != nil {
|
||||
slog.Warn("restore: unreadable snapshot meta, trashing dir",
|
||||
"id", name, "error", err)
|
||||
trashCorruptDir(snapDir)
|
||||
continue
|
||||
}
|
||||
if meta.SlotIndex == 0 {
|
||||
slog.Warn("restore: snapshot has no slot_index, trashing dir", "id", name)
|
||||
trashCorruptDir(snapDir)
|
||||
continue
|
||||
}
|
||||
teamBytes, err := parsePlainUUID(meta.TeamID)
|
||||
if err != nil {
|
||||
slog.Warn("restore: bad team_id in snapshot meta", "id", name, "error", err)
|
||||
trashCorruptDir(snapDir)
|
||||
continue
|
||||
}
|
||||
templateBytes, err := parsePlainUUID(meta.TemplateID)
|
||||
if err != nil {
|
||||
slog.Warn("restore: bad template_id in snapshot meta", "id", name, "error", err)
|
||||
trashCorruptDir(snapDir)
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, candidate{
|
||||
sandboxID: name,
|
||||
snapDir: snapDir,
|
||||
meta: meta,
|
||||
teamID: teamBytes,
|
||||
templID: templateBytes,
|
||||
})
|
||||
}
|
||||
|
||||
// Pass 2: bucket by slot index, pick the newest CreatedAt per slot.
|
||||
// Multiple candidates per slot happen when older paused-sandbox dirs
|
||||
// were left on disk by the pre-fix leak (DB row marked stopped but the
|
||||
// snapshot was never cleaned). The newest is the most likely live one;
|
||||
// older losers are trashed so CleanupOrphanPauseDirs sweeps them on
|
||||
// the next startup.
|
||||
bySlot := make(map[int][]candidate, len(candidates))
|
||||
for _, c := range candidates {
|
||||
bySlot[c.meta.SlotIndex] = append(bySlot[c.meta.SlotIndex], c)
|
||||
}
|
||||
|
||||
restored := 0
|
||||
pruned := 0
|
||||
for slot, cands := range bySlot {
|
||||
sort.Slice(cands, func(i, j int) bool {
|
||||
return cands[i].meta.CreatedAt.After(cands[j].meta.CreatedAt)
|
||||
})
|
||||
|
||||
// Trash every loser. The host_monitor's zombie-cleanup path catches
|
||||
// the winner if its DB row says 'stopped' — but losers never enter
|
||||
// m.boxes and would otherwise sit on disk indefinitely.
|
||||
for _, stale := range cands[1:] {
|
||||
slog.Info("restore: pruning older snapshot for same slot",
|
||||
"id", stale.sandboxID, "slot", slot, "created", stale.meta.CreatedAt,
|
||||
"winner", cands[0].sandboxID, "winner_created", cands[0].meta.CreatedAt)
|
||||
trashCorruptDir(stale.snapDir)
|
||||
pruned++
|
||||
}
|
||||
|
||||
winner := cands[0]
|
||||
if err := m.slots.Reserve(winner.meta.SlotIndex); err != nil {
|
||||
// Reserve only fails if another candidate (different slot value
|
||||
// in meta but same numeric index) already grabbed it, or if the
|
||||
// allocator is corrupt. Either way the snapshot is unusable
|
||||
// without a slot, so trash it.
|
||||
slog.Warn("restore: slot reservation failed, trashing dir",
|
||||
"id", winner.sandboxID, "slot", winner.meta.SlotIndex, "error", err)
|
||||
trashCorruptDir(winner.snapDir)
|
||||
pruned++
|
||||
continue
|
||||
}
|
||||
|
||||
sb := &sandboxState{
|
||||
Sandbox: models.Sandbox{
|
||||
ID: winner.sandboxID,
|
||||
Status: models.StatusPaused,
|
||||
TemplateTeamID: winner.teamID,
|
||||
TemplateID: winner.templID,
|
||||
VCPUs: winner.meta.VCPUs,
|
||||
MemoryMB: winner.meta.MemoryMB,
|
||||
TimeoutSec: winner.meta.TimeoutSec,
|
||||
SlotIndex: winner.meta.SlotIndex,
|
||||
CreatedAt: winner.meta.CreatedAt,
|
||||
// LastActiveAt cosmetic only — TTL reaper ignores non-Running.
|
||||
LastActiveAt: winner.meta.CreatedAt,
|
||||
},
|
||||
// connTracker must be non-nil: resumeFromMeta calls Reset() on it
|
||||
// unconditionally during rehydration. A nil pointer would panic.
|
||||
connTracker: &ConnTracker{},
|
||||
// baseImagePath intentionally left empty — see function doc.
|
||||
// sandboxDirOverride intentionally left empty — resumeFromMeta
|
||||
// reads meta.SandboxDir from disk on the resume path.
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
m.boxes[winner.sandboxID] = sb
|
||||
m.mu.Unlock()
|
||||
restored++
|
||||
|
||||
slog.Info("restored paused sandbox", "id", winner.sandboxID,
|
||||
"slot", winner.meta.SlotIndex, "vcpus", winner.meta.VCPUs, "memory_mb", winner.meta.MemoryMB)
|
||||
}
|
||||
|
||||
if restored > 0 || pruned > 0 {
|
||||
slog.Info("paused sandbox restore complete", "restored", restored, "pruned", pruned)
|
||||
}
|
||||
}
|
||||
|
||||
// parsePlainUUID turns a standard hyphenated UUID string (as produced by
|
||||
// id.UUIDString) back into the 16-byte representation used by sandboxState.
|
||||
func parsePlainUUID(s string) ([16]byte, error) {
|
||||
if s == "" {
|
||||
return [16]byte{}, fmt.Errorf("empty uuid string")
|
||||
}
|
||||
u, err := uuid.Parse(s)
|
||||
if err != nil {
|
||||
return [16]byte{}, err
|
||||
}
|
||||
return [16]byte(u), nil
|
||||
}
|
||||
|
||||
// trashCorruptDir renames a corrupt snapshot directory aside so a future
|
||||
// CleanupOrphanPauseDirs sweeps it. Best-effort: if rename fails we log
|
||||
// and move on — leaving the directory in place is safe (restore will skip
|
||||
// it again next startup) but unwanted.
|
||||
func trashCorruptDir(dir string) {
|
||||
parent := filepath.Dir(dir)
|
||||
base := filepath.Base(dir)
|
||||
trash := filepath.Join(parent, fmt.Sprintf("%s.trash-%d", base, time.Now().UnixNano()))
|
||||
if err := os.Rename(dir, trash); err != nil {
|
||||
slog.Warn("restore: failed to trash corrupt snapshot dir",
|
||||
"src", dir, "dst", trash, "error", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user