1
0
forked from wrenn/wrenn

Prefix network namespaces with wrenn-, add stale cleanup, lower diff cap

Rename ns-{idx} to wrenn-ns-{idx} and veth-{idx} to wrenn-veth-{idx}
to avoid collisions with other tools. Add CleanupStaleNamespaces() at
agent startup to remove orphaned namespaces, veths, iptables rules, and
routes from a previous crash. Lower maxDiffGenerations from 10 to 8 to
prevent Go runtime memory corruption from snapshot/restore drift.
This commit is contained in:
2026-03-29 02:14:30 +06:00
parent 46d60fc5a5
commit 1ca10230a9
3 changed files with 89 additions and 5 deletions

View File

@ -16,6 +16,7 @@ import (
"git.omukk.dev/wrenn/sandbox/internal/devicemapper"
"git.omukk.dev/wrenn/sandbox/internal/hostagent"
"git.omukk.dev/wrenn/sandbox/internal/network"
"git.omukk.dev/wrenn/sandbox/internal/sandbox"
"git.omukk.dev/wrenn/sandbox/proto/hostagent/gen/hostagentv1connect"
)
@ -42,8 +43,9 @@ func main() {
slog.Warn("failed to enable ip_forward", "error", err)
}
// Clean up any stale dm-snapshot devices from a previous crash.
// Clean up stale resources from a previous crash.
devicemapper.CleanupStaleDevices()
network.CleanupStaleNamespaces()
listenAddr := envOrDefault("WRENN_HOST_LISTEN_ADDR", ":50051")
rootDir := envOrDefault("WRENN_DIR", "/var/lib/wrenn")

View File

@ -5,13 +5,91 @@ import (
"fmt"
"log/slog"
"net"
"os"
"os/exec"
"runtime"
"strings"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
)
const nsPrefix = "wrenn-ns-"
// CleanupStaleNamespaces removes leftover wrenn network namespaces from a
// previous crash. Called once at agent startup.
func CleanupStaleNamespaces() {
entries, err := os.ReadDir("/run/netns")
if err != nil {
return // no /run/netns or unreadable — nothing to clean
}
for _, e := range entries {
name := e.Name()
if !strings.HasPrefix(name, nsPrefix) {
continue
}
// Also remove the associated veth from the host side.
vethName := "wrenn-veth-" + strings.TrimPrefix(name, nsPrefix)
if link, err := netlink.LinkByName(vethName); err == nil {
_ = netlink.LinkDel(link)
}
if err := netns.DeleteNamed(name); err != nil {
slog.Warn("failed to remove stale namespace", "ns", name, "error", err)
} else {
slog.Info("removed stale namespace", "ns", name)
}
}
// Clean up any stale wrenn iptables rules referencing old veth interfaces.
cleanupStaleIptablesRules()
}
// cleanupStaleIptablesRules removes host iptables rules that reference
// wrenn-veth interfaces no longer present on the system.
func cleanupStaleIptablesRules() {
for _, table := range []string{"filter", "nat"} {
cmd := exec.Command("iptables-save", "-t", table)
out, err := cmd.Output()
if err != nil {
continue
}
for _, line := range strings.Split(string(out), "\n") {
if !strings.Contains(line, "wrenn-veth-") {
continue
}
// Lines look like "-A FORWARD -i wrenn-veth-1 -o wlo1 -j ACCEPT"
// Convert -A to -D to delete the rule.
if !strings.HasPrefix(line, "-A ") {
continue
}
delRule := "-D " + line[3:]
args := strings.Fields(delRule)
delCmd := exec.Command("iptables", append([]string{"-t", table}, args...)...)
if err := delCmd.Run(); err != nil {
slog.Debug("failed to remove stale iptables rule", "rule", line, "error", err)
}
}
}
// Also remove stale host routes to 10.11.0.x via wrenn-veth interfaces.
routes, err := netlink.RouteList(nil, netlink.FAMILY_V4)
if err != nil {
return
}
for _, r := range routes {
if r.LinkIndex == 0 {
continue
}
link, err := netlink.LinkByIndex(r.LinkIndex)
if err != nil {
continue
}
if strings.HasPrefix(link.Attrs().Name, "wrenn-veth-") {
_ = netlink.RouteDel(&r)
}
}
}
const (
// Fixed addresses inside each network namespace (safe because each
// sandbox gets its own netns).
@ -84,8 +162,8 @@ func NewSlot(index int) *Slot {
GuestIP: guestIP,
GuestNetMask: guestNetMask,
TapName: tapName,
NamespaceID: fmt.Sprintf("ns-%d", index),
VethName: fmt.Sprintf("veth-%d", index),
NamespaceID: fmt.Sprintf("wrenn-ns-%d", index),
VethName: fmt.Sprintf("wrenn-veth-%d", index),
}
}

View File

@ -73,8 +73,12 @@ type snapshotParent struct {
}
// maxDiffGenerations caps how many incremental diff generations we chain
// before falling back to a Full snapshot to collapse the chain.
const maxDiffGenerations = 10
// before falling back to a Full snapshot to collapse the chain. Firecracker
// snapshot/restore of a Go process (envd) accumulates runtime memory state
// drift; empirically, ~10 diff-based cycles corrupt the Go page allocator.
// A Full snapshot resets the generation counter and produces a clean base,
// preventing the crash.
const maxDiffGenerations = 8
// New creates a new sandbox manager.
func New(cfg Config) *Manager {