forked from wrenn/wrenn
Prefix network namespaces with wrenn-, add stale cleanup, lower diff cap
Rename ns-{idx} to wrenn-ns-{idx} and veth-{idx} to wrenn-veth-{idx}
to avoid collisions with other tools. Add CleanupStaleNamespaces() at
agent startup to remove orphaned namespaces, veths, iptables rules, and
routes from a previous crash. Lower maxDiffGenerations from 10 to 8 to
prevent Go runtime memory corruption from snapshot/restore drift.
This commit is contained in:
@ -16,6 +16,7 @@ import (
|
|||||||
|
|
||||||
"git.omukk.dev/wrenn/sandbox/internal/devicemapper"
|
"git.omukk.dev/wrenn/sandbox/internal/devicemapper"
|
||||||
"git.omukk.dev/wrenn/sandbox/internal/hostagent"
|
"git.omukk.dev/wrenn/sandbox/internal/hostagent"
|
||||||
|
"git.omukk.dev/wrenn/sandbox/internal/network"
|
||||||
"git.omukk.dev/wrenn/sandbox/internal/sandbox"
|
"git.omukk.dev/wrenn/sandbox/internal/sandbox"
|
||||||
"git.omukk.dev/wrenn/sandbox/proto/hostagent/gen/hostagentv1connect"
|
"git.omukk.dev/wrenn/sandbox/proto/hostagent/gen/hostagentv1connect"
|
||||||
)
|
)
|
||||||
@ -42,8 +43,9 @@ func main() {
|
|||||||
slog.Warn("failed to enable ip_forward", "error", err)
|
slog.Warn("failed to enable ip_forward", "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up any stale dm-snapshot devices from a previous crash.
|
// Clean up stale resources from a previous crash.
|
||||||
devicemapper.CleanupStaleDevices()
|
devicemapper.CleanupStaleDevices()
|
||||||
|
network.CleanupStaleNamespaces()
|
||||||
|
|
||||||
listenAddr := envOrDefault("WRENN_HOST_LISTEN_ADDR", ":50051")
|
listenAddr := envOrDefault("WRENN_HOST_LISTEN_ADDR", ":50051")
|
||||||
rootDir := envOrDefault("WRENN_DIR", "/var/lib/wrenn")
|
rootDir := envOrDefault("WRENN_DIR", "/var/lib/wrenn")
|
||||||
|
|||||||
@ -5,13 +5,91 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net"
|
"net"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/vishvananda/netlink"
|
"github.com/vishvananda/netlink"
|
||||||
"github.com/vishvananda/netns"
|
"github.com/vishvananda/netns"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const nsPrefix = "wrenn-ns-"
|
||||||
|
|
||||||
|
// CleanupStaleNamespaces removes leftover wrenn network namespaces from a
|
||||||
|
// previous crash. Called once at agent startup.
|
||||||
|
func CleanupStaleNamespaces() {
|
||||||
|
entries, err := os.ReadDir("/run/netns")
|
||||||
|
if err != nil {
|
||||||
|
return // no /run/netns or unreadable — nothing to clean
|
||||||
|
}
|
||||||
|
for _, e := range entries {
|
||||||
|
name := e.Name()
|
||||||
|
if !strings.HasPrefix(name, nsPrefix) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Also remove the associated veth from the host side.
|
||||||
|
vethName := "wrenn-veth-" + strings.TrimPrefix(name, nsPrefix)
|
||||||
|
if link, err := netlink.LinkByName(vethName); err == nil {
|
||||||
|
_ = netlink.LinkDel(link)
|
||||||
|
}
|
||||||
|
if err := netns.DeleteNamed(name); err != nil {
|
||||||
|
slog.Warn("failed to remove stale namespace", "ns", name, "error", err)
|
||||||
|
} else {
|
||||||
|
slog.Info("removed stale namespace", "ns", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up any stale wrenn iptables rules referencing old veth interfaces.
|
||||||
|
cleanupStaleIptablesRules()
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupStaleIptablesRules removes host iptables rules that reference
|
||||||
|
// wrenn-veth interfaces no longer present on the system.
|
||||||
|
func cleanupStaleIptablesRules() {
|
||||||
|
for _, table := range []string{"filter", "nat"} {
|
||||||
|
cmd := exec.Command("iptables-save", "-t", table)
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
if !strings.Contains(line, "wrenn-veth-") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Lines look like "-A FORWARD -i wrenn-veth-1 -o wlo1 -j ACCEPT"
|
||||||
|
// Convert -A to -D to delete the rule.
|
||||||
|
if !strings.HasPrefix(line, "-A ") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
delRule := "-D " + line[3:]
|
||||||
|
args := strings.Fields(delRule)
|
||||||
|
delCmd := exec.Command("iptables", append([]string{"-t", table}, args...)...)
|
||||||
|
if err := delCmd.Run(); err != nil {
|
||||||
|
slog.Debug("failed to remove stale iptables rule", "rule", line, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also remove stale host routes to 10.11.0.x via wrenn-veth interfaces.
|
||||||
|
routes, err := netlink.RouteList(nil, netlink.FAMILY_V4)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, r := range routes {
|
||||||
|
if r.LinkIndex == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
link, err := netlink.LinkByIndex(r.LinkIndex)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(link.Attrs().Name, "wrenn-veth-") {
|
||||||
|
_ = netlink.RouteDel(&r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Fixed addresses inside each network namespace (safe because each
|
// Fixed addresses inside each network namespace (safe because each
|
||||||
// sandbox gets its own netns).
|
// sandbox gets its own netns).
|
||||||
@ -84,8 +162,8 @@ func NewSlot(index int) *Slot {
|
|||||||
GuestIP: guestIP,
|
GuestIP: guestIP,
|
||||||
GuestNetMask: guestNetMask,
|
GuestNetMask: guestNetMask,
|
||||||
TapName: tapName,
|
TapName: tapName,
|
||||||
NamespaceID: fmt.Sprintf("ns-%d", index),
|
NamespaceID: fmt.Sprintf("wrenn-ns-%d", index),
|
||||||
VethName: fmt.Sprintf("veth-%d", index),
|
VethName: fmt.Sprintf("wrenn-veth-%d", index),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -73,8 +73,12 @@ type snapshotParent struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// maxDiffGenerations caps how many incremental diff generations we chain
|
// maxDiffGenerations caps how many incremental diff generations we chain
|
||||||
// before falling back to a Full snapshot to collapse the chain.
|
// before falling back to a Full snapshot to collapse the chain. Firecracker
|
||||||
const maxDiffGenerations = 10
|
// snapshot/restore of a Go process (envd) accumulates runtime memory state
|
||||||
|
// drift; empirically, ~10 diff-based cycles corrupt the Go page allocator.
|
||||||
|
// A Full snapshot resets the generation counter and produces a clean base,
|
||||||
|
// preventing the crash.
|
||||||
|
const maxDiffGenerations = 8
|
||||||
|
|
||||||
// New creates a new sandbox manager.
|
// New creates a new sandbox manager.
|
||||||
func New(cfg Config) *Manager {
|
func New(cfg Config) *Manager {
|
||||||
|
|||||||
Reference in New Issue
Block a user