forked from wrenn/wrenn
Pre-pause snapshot signal to prevent Go runtime crash on restore
envd crashes with "fatal error: bad summary data" after Firecracker snapshot/restore because the page allocator radix tree is inconsistent when vCPUs are frozen mid-allocation. The port scanner goroutine allocates heavily every second, making it the primary trigger. Add POST /snapshot/prepare to envd — the host agent calls it before vm.Pause to quiesce continuous goroutines and force GC. On restore, PostInit restarts the port subsystem via the existing /init endpoint. - New PortSubsystem abstraction with Start/Stop/Restart lifecycle - Context-based goroutine cancellation (replaces irreversible channel close) - Context-aware Signal to prevent scanner/forwarder deadlock - Fix forwarder goroutine leak (was spinning forever on closed channel) - Kill socat children on stop to prevent orphans across snapshots - Fix double cmd.Wait panic (exec.Command instead of CommandContext)
This commit is contained in:
19
envd/main.go
19
envd/main.go
@ -190,7 +190,14 @@ func main() {
|
||||
processLogger := l.With().Str("logger", "process").Logger()
|
||||
processService := processRpc.Handle(m, &processLogger, defaults, cgroupManager)
|
||||
|
||||
service := api.New(&envLogger, defaults, mmdsChan, isNotFC)
|
||||
// Port scanner and forwarder are managed by PortSubsystem, which
|
||||
// supports stop/restart across Firecracker snapshot/restore cycles.
|
||||
portLogger := l.With().Str("logger", "port-forwarder").Logger()
|
||||
portSubsystem := publicport.NewPortSubsystem(&portLogger, cgroupManager, portScannerInterval)
|
||||
portSubsystem.Start(ctx)
|
||||
defer portSubsystem.Stop()
|
||||
|
||||
service := api.New(&envLogger, defaults, mmdsChan, isNotFC, ctx, portSubsystem)
|
||||
handler := api.HandlerFromMux(service, m)
|
||||
middleware := authn.NewMiddleware(permissions.AuthenticateUsername)
|
||||
|
||||
@ -229,16 +236,6 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Bind all open ports on 127.0.0.1 and localhost to the eth0 interface
|
||||
portScanner := publicport.NewScanner(portScannerInterval)
|
||||
defer portScanner.Destroy()
|
||||
|
||||
portLogger := l.With().Str("logger", "port-forwarder").Logger()
|
||||
portForwarder := publicport.NewForwarder(&portLogger, portScanner, cgroupManager)
|
||||
go portForwarder.StartForwarding(ctx)
|
||||
|
||||
go portScanner.ScanAndBroadcast()
|
||||
|
||||
err := s.ListenAndServe()
|
||||
if err != nil {
|
||||
log.Fatalf("error starting server: %v", err)
|
||||
|
||||
Reference in New Issue
Block a user