forked from wrenn/wrenn
v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev> Reviewed-on: wrenn/wrenn#50
This commit is contained in:
186
internal/sandbox/punch.go
Normal file
186
internal/sandbox/punch.go
Normal file
@ -0,0 +1,186 @@
|
||||
// Package sandbox: post-snapshot hole punching for memory-ranges files.
|
||||
//
|
||||
// CH v52's SEEK_DATA/SEEK_HOLE snapshot writer only skips ranges already
|
||||
// hole in the source memfd. Pages the guest never reported as free are
|
||||
// written verbatim — including pages whose contents happen to be all zero
|
||||
// (fresh allocations the guest scribbled then released without telling the
|
||||
// balloon driver). Walking the resulting file and punching any 4 KiB block
|
||||
// of zeros recovers that space without any guest cooperation.
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// punchBlockSize is the granularity at which we test for zero runs and
|
||||
// issue FALLOC_FL_PUNCH_HOLE. Matches the kernel page size and the
|
||||
// minimum hole size on ext4.
|
||||
punchBlockSize = 4096
|
||||
|
||||
// punchReadSize is the IO chunk size used by the scan loop. We read
|
||||
// many blocks per syscall and split them in-memory so a 20 GiB
|
||||
// memory-ranges file costs ~20K read(2) syscalls instead of ~5M.
|
||||
// Crucial under single-disk hosts where each syscall otherwise
|
||||
// contends with sshd / journal IO.
|
||||
punchReadSize = 1 << 20 // 1 MiB = 256 blocks
|
||||
)
|
||||
|
||||
// punchZeroPagesInDir runs punchZeroPages on every memory* file in dir.
|
||||
// CH writes its memory dump as one or more files prefixed "memory" inside
|
||||
// the snapshot directory; everything else (config.json, state.json) is
|
||||
// metadata and untouched.
|
||||
func punchZeroPagesInDir(dir string) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
slog.Warn("punch: read snapshot dir", "dir", dir, "error", err)
|
||||
return
|
||||
}
|
||||
for _, e := range entries {
|
||||
if e.IsDir() || !strings.HasPrefix(e.Name(), "memory") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(dir, e.Name())
|
||||
before, after, err := punchZeroPages(path)
|
||||
if err != nil {
|
||||
slog.Warn("punch: zero-page scan failed", "path", path, "error", err)
|
||||
continue
|
||||
}
|
||||
slog.Info("punch: zero-page scan done",
|
||||
"path", path,
|
||||
"alloc_before", before,
|
||||
"alloc_after", after,
|
||||
"reclaimed", before-after)
|
||||
}
|
||||
}
|
||||
|
||||
// punchZeroPages scans path block-by-block, batching runs of all-zero 4 KiB
|
||||
// blocks and punching them out via FALLOC_FL_PUNCH_HOLE. Existing holes are
|
||||
// skipped via SEEK_DATA so a partially-sparse input stays cheap to scan.
|
||||
//
|
||||
// Returns the file's disk allocation (st_blocks * 512) before and after.
|
||||
func punchZeroPages(path string) (int64, int64, error) {
|
||||
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
stBefore, err := statBlocks(f)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat before: %w", err)
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat: %w", err)
|
||||
}
|
||||
size := fi.Size()
|
||||
|
||||
buf := make([]byte, punchReadSize)
|
||||
off := int64(0)
|
||||
|
||||
for off < size {
|
||||
// Skip ahead to next data region; nothing to do in holes.
|
||||
next, err := f.Seek(off, 3) // SEEK_DATA = 3
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, unix.ENXIO) {
|
||||
break
|
||||
}
|
||||
return 0, 0, fmt.Errorf("seek_data @ %d: %w", off, err)
|
||||
}
|
||||
off = next &^ (punchBlockSize - 1) // align down to block
|
||||
|
||||
// Find end of this data extent.
|
||||
endData, err := f.Seek(off, 4) // SEEK_HOLE = 4
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("seek_hole @ %d: %w", off, err)
|
||||
}
|
||||
|
||||
// Scan [off, endData) chunk by chunk; batch zero runs across both
|
||||
// intra-chunk and inter-chunk boundaries so a contiguous zero
|
||||
// region is punched in a single fallocate.
|
||||
zeroStart := int64(-1)
|
||||
cur := off
|
||||
for cur < endData {
|
||||
toRead := min(int64(len(buf)), endData-cur)
|
||||
n, err := readAt(f, buf[:toRead], cur)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("read @ %d: %w", cur, err)
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
// Walk the chunk one block at a time, tracking zero runs.
|
||||
for blkOff := 0; blkOff < n; blkOff += punchBlockSize {
|
||||
blkEnd := min(blkOff+punchBlockSize, n)
|
||||
blk := buf[blkOff:blkEnd]
|
||||
blkAbs := cur + int64(blkOff)
|
||||
if isZero(blk) && len(blk) == punchBlockSize {
|
||||
if zeroStart < 0 {
|
||||
zeroStart = blkAbs
|
||||
}
|
||||
} else if zeroStart >= 0 {
|
||||
if err := punch(f, zeroStart, blkAbs-zeroStart); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
zeroStart = -1
|
||||
}
|
||||
}
|
||||
cur += int64(n)
|
||||
}
|
||||
if zeroStart >= 0 {
|
||||
if err := punch(f, zeroStart, cur-zeroStart); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
}
|
||||
off = endData
|
||||
}
|
||||
|
||||
stAfter, err := statBlocks(f)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("stat after: %w", err)
|
||||
}
|
||||
return stBefore, stAfter, nil
|
||||
}
|
||||
|
||||
func punch(f *os.File, off, length int64) error {
|
||||
mode := uint32(unix.FALLOC_FL_PUNCH_HOLE | unix.FALLOC_FL_KEEP_SIZE)
|
||||
if err := unix.Fallocate(int(f.Fd()), mode, off, length); err != nil {
|
||||
return fmt.Errorf("fallocate punch @ %d len %d: %w", off, length, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readAt(f *os.File, buf []byte, off int64) (int, error) {
|
||||
n, err := f.ReadAt(buf, off)
|
||||
if err == io.EOF {
|
||||
return n, nil
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func isZero(b []byte) bool {
|
||||
for _, x := range b {
|
||||
if x != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func statBlocks(f *os.File) (int64, error) {
|
||||
var st unix.Stat_t
|
||||
if err := unix.Fstat(int(f.Fd()), &st); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int64(st.Blocks) * 512, nil
|
||||
}
|
||||
Reference in New Issue
Block a user