1
0
forked from wrenn/wrenn

Pre-pause snapshot signal to prevent Go runtime crash on restore

envd crashes with "fatal error: bad summary data" after Firecracker
snapshot/restore because the page allocator radix tree is inconsistent
when vCPUs are frozen mid-allocation. The port scanner goroutine
allocates heavily every second, making it the primary trigger.

Add POST /snapshot/prepare to envd — the host agent calls it before
vm.Pause to quiesce continuous goroutines and force GC. On restore,
PostInit restarts the port subsystem via the existing /init endpoint.

- New PortSubsystem abstraction with Start/Stop/Restart lifecycle
- Context-based goroutine cancellation (replaces irreversible channel close)
- Context-aware Signal to prevent scanner/forwarder deadlock
- Fix forwarder goroutine leak (was spinning forever on closed channel)
- Kill socat children on stop to prevent orphans across snapshots
- Fix double cmd.Wait panic (exec.Command instead of CommandContext)
This commit is contained in:
2026-04-13 05:21:10 +06:00
parent 117c46a386
commit 962860ba74
15 changed files with 317 additions and 59 deletions

View File

@ -1,8 +1,11 @@
// SPDX-License-Identifier: Apache-2.0
// Modifications by M/S Omukk
package port
import (
"context"
"github.com/rs/zerolog"
)
@ -33,19 +36,26 @@ func (ss *ScannerSubscriber) Destroy() {
close(ss.Messages)
}
func (ss *ScannerSubscriber) Signal(conns []ConnStat) {
// Filter isn't specified. Accept everything.
// Signal sends the (filtered) connection list to the subscriber. It respects
// ctx cancellation so the scanner goroutine is never stuck waiting for a
// consumer that has already exited.
func (ss *ScannerSubscriber) Signal(ctx context.Context, conns []ConnStat) {
var payload []ConnStat
if ss.filter == nil {
ss.Messages <- conns
payload = conns
} else {
filtered := []ConnStat{}
for i := range conns {
// We need to access the list directly otherwise there will be implicit memory aliasing
// If the filter matched a connection, we will send it to a channel.
if ss.filter.Match(&conns[i]) {
filtered = append(filtered, conns[i])
}
}
ss.Messages <- filtered
payload = filtered
}
select {
case ss.Messages <- payload:
case <-ctx.Done():
}
}