forked from wrenn/wrenn
Prefix network namespaces with wrenn-, add stale cleanup, lower diff cap
Rename ns-{idx} to wrenn-ns-{idx} and veth-{idx} to wrenn-veth-{idx}
to avoid collisions with other tools. Add CleanupStaleNamespaces() at
agent startup to remove orphaned namespaces, veths, iptables rules, and
routes from a previous crash. Lower maxDiffGenerations from 10 to 8 to
prevent Go runtime memory corruption from snapshot/restore drift.
This commit is contained in:
@ -16,6 +16,7 @@ import (
|
||||
|
||||
"git.omukk.dev/wrenn/sandbox/internal/devicemapper"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/hostagent"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/network"
|
||||
"git.omukk.dev/wrenn/sandbox/internal/sandbox"
|
||||
"git.omukk.dev/wrenn/sandbox/proto/hostagent/gen/hostagentv1connect"
|
||||
)
|
||||
@ -42,8 +43,9 @@ func main() {
|
||||
slog.Warn("failed to enable ip_forward", "error", err)
|
||||
}
|
||||
|
||||
// Clean up any stale dm-snapshot devices from a previous crash.
|
||||
// Clean up stale resources from a previous crash.
|
||||
devicemapper.CleanupStaleDevices()
|
||||
network.CleanupStaleNamespaces()
|
||||
|
||||
listenAddr := envOrDefault("WRENN_HOST_LISTEN_ADDR", ":50051")
|
||||
rootDir := envOrDefault("WRENN_DIR", "/var/lib/wrenn")
|
||||
|
||||
@ -5,13 +5,91 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/vishvananda/netlink"
|
||||
"github.com/vishvananda/netns"
|
||||
)
|
||||
|
||||
const nsPrefix = "wrenn-ns-"
|
||||
|
||||
// CleanupStaleNamespaces removes leftover wrenn network namespaces from a
|
||||
// previous crash. Called once at agent startup.
|
||||
func CleanupStaleNamespaces() {
|
||||
entries, err := os.ReadDir("/run/netns")
|
||||
if err != nil {
|
||||
return // no /run/netns or unreadable — nothing to clean
|
||||
}
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if !strings.HasPrefix(name, nsPrefix) {
|
||||
continue
|
||||
}
|
||||
// Also remove the associated veth from the host side.
|
||||
vethName := "wrenn-veth-" + strings.TrimPrefix(name, nsPrefix)
|
||||
if link, err := netlink.LinkByName(vethName); err == nil {
|
||||
_ = netlink.LinkDel(link)
|
||||
}
|
||||
if err := netns.DeleteNamed(name); err != nil {
|
||||
slog.Warn("failed to remove stale namespace", "ns", name, "error", err)
|
||||
} else {
|
||||
slog.Info("removed stale namespace", "ns", name)
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up any stale wrenn iptables rules referencing old veth interfaces.
|
||||
cleanupStaleIptablesRules()
|
||||
}
|
||||
|
||||
// cleanupStaleIptablesRules removes host iptables rules that reference
|
||||
// wrenn-veth interfaces no longer present on the system.
|
||||
func cleanupStaleIptablesRules() {
|
||||
for _, table := range []string{"filter", "nat"} {
|
||||
cmd := exec.Command("iptables-save", "-t", table)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if !strings.Contains(line, "wrenn-veth-") {
|
||||
continue
|
||||
}
|
||||
// Lines look like "-A FORWARD -i wrenn-veth-1 -o wlo1 -j ACCEPT"
|
||||
// Convert -A to -D to delete the rule.
|
||||
if !strings.HasPrefix(line, "-A ") {
|
||||
continue
|
||||
}
|
||||
delRule := "-D " + line[3:]
|
||||
args := strings.Fields(delRule)
|
||||
delCmd := exec.Command("iptables", append([]string{"-t", table}, args...)...)
|
||||
if err := delCmd.Run(); err != nil {
|
||||
slog.Debug("failed to remove stale iptables rule", "rule", line, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also remove stale host routes to 10.11.0.x via wrenn-veth interfaces.
|
||||
routes, err := netlink.RouteList(nil, netlink.FAMILY_V4)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, r := range routes {
|
||||
if r.LinkIndex == 0 {
|
||||
continue
|
||||
}
|
||||
link, err := netlink.LinkByIndex(r.LinkIndex)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(link.Attrs().Name, "wrenn-veth-") {
|
||||
_ = netlink.RouteDel(&r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// Fixed addresses inside each network namespace (safe because each
|
||||
// sandbox gets its own netns).
|
||||
@ -84,8 +162,8 @@ func NewSlot(index int) *Slot {
|
||||
GuestIP: guestIP,
|
||||
GuestNetMask: guestNetMask,
|
||||
TapName: tapName,
|
||||
NamespaceID: fmt.Sprintf("ns-%d", index),
|
||||
VethName: fmt.Sprintf("veth-%d", index),
|
||||
NamespaceID: fmt.Sprintf("wrenn-ns-%d", index),
|
||||
VethName: fmt.Sprintf("wrenn-veth-%d", index),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -73,8 +73,12 @@ type snapshotParent struct {
|
||||
}
|
||||
|
||||
// maxDiffGenerations caps how many incremental diff generations we chain
|
||||
// before falling back to a Full snapshot to collapse the chain.
|
||||
const maxDiffGenerations = 10
|
||||
// before falling back to a Full snapshot to collapse the chain. Firecracker
|
||||
// snapshot/restore of a Go process (envd) accumulates runtime memory state
|
||||
// drift; empirically, ~10 diff-based cycles corrupt the Go page allocator.
|
||||
// A Full snapshot resets the generation counter and produces a clean base,
|
||||
// preventing the crash.
|
||||
const maxDiffGenerations = 8
|
||||
|
||||
// New creates a new sandbox manager.
|
||||
func New(cfg Config) *Manager {
|
||||
|
||||
Reference in New Issue
Block a user