forked from wrenn/wrenn
Replace gopsutil port scanner with direct /proc/net/tcp reading
The envd port scanner used gopsutil's net.Connections() which walks
/proc/{pid}/fd to enumerate socket inodes. This corrupts Go runtime
semaphore state when the VM is paused mid-operation and restored from
a Firecracker snapshot.
Replace with a direct /proc/net/tcp + /proc/net/tcp6 parser that reads
a single file per address family — no /proc/{pid}/fd walk, no goroutines,
no WaitGroups. Also replace concurrent-map (smap) in the scanner with a
plain sync.RWMutex-protected map, since concurrent-map's Items() spawns
goroutines with a WaitGroup internally, which is equally unsafe across
snapshot boundaries.
Use socket inode instead of PID for the port forwarding map key, since
inode is available directly from /proc/net/tcp without the fd walk.
This commit is contained in:
@ -3,19 +3,21 @@
|
||||
package port
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/shirou/gopsutil/v4/net"
|
||||
|
||||
"git.omukk.dev/wrenn/sandbox/envd/internal/shared/smap"
|
||||
)
|
||||
|
||||
type Scanner struct {
|
||||
Processes chan net.ConnectionStat
|
||||
scanExit chan struct{}
|
||||
subs *smap.Map[*ScannerSubscriber]
|
||||
period time.Duration
|
||||
scanExit chan struct{}
|
||||
period time.Duration
|
||||
|
||||
// Plain mutex-protected map instead of concurrent-map. The concurrent-map
|
||||
// library's Items() spawns goroutines and uses a WaitGroup internally,
|
||||
// which corrupts Go runtime semaphore state across Firecracker snapshot/restore.
|
||||
mu sync.RWMutex
|
||||
subs map[string]*ScannerSubscriber
|
||||
}
|
||||
|
||||
func (s *Scanner) Destroy() {
|
||||
@ -24,33 +26,44 @@ func (s *Scanner) Destroy() {
|
||||
|
||||
func NewScanner(period time.Duration) *Scanner {
|
||||
return &Scanner{
|
||||
period: period,
|
||||
subs: smap.New[*ScannerSubscriber](),
|
||||
scanExit: make(chan struct{}),
|
||||
Processes: make(chan net.ConnectionStat),
|
||||
period: period,
|
||||
subs: make(map[string]*ScannerSubscriber),
|
||||
scanExit: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) AddSubscriber(logger *zerolog.Logger, id string, filter *ScannerFilter) *ScannerSubscriber {
|
||||
subscriber := NewScannerSubscriber(logger, id, filter)
|
||||
s.subs.Insert(id, subscriber)
|
||||
|
||||
s.mu.Lock()
|
||||
s.subs[id] = subscriber
|
||||
s.mu.Unlock()
|
||||
|
||||
return subscriber
|
||||
}
|
||||
|
||||
func (s *Scanner) Unsubscribe(sub *ScannerSubscriber) {
|
||||
s.subs.Remove(sub.ID())
|
||||
s.mu.Lock()
|
||||
delete(s.subs, sub.ID())
|
||||
s.mu.Unlock()
|
||||
|
||||
sub.Destroy()
|
||||
}
|
||||
|
||||
// ScanAndBroadcast starts scanning open TCP ports and broadcasts every open port to all subscribers.
|
||||
func (s *Scanner) ScanAndBroadcast() {
|
||||
for {
|
||||
// tcp monitors both ipv4 and ipv6 connections.
|
||||
processes, _ := net.Connections("tcp")
|
||||
for _, sub := range s.subs.Items() {
|
||||
sub.Signal(processes)
|
||||
// Read directly from /proc/net/tcp and /proc/net/tcp6 instead of
|
||||
// using gopsutil's net.Connections(), which walks /proc/{pid}/fd
|
||||
// and causes Go runtime corruption after Firecracker snapshot/restore.
|
||||
conns, _ := ReadTCPConnections()
|
||||
|
||||
s.mu.RLock()
|
||||
for _, sub := range s.subs {
|
||||
sub.Signal(conns)
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
select {
|
||||
case <-s.scanExit:
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user