From a5425969edfdb895021a287e62f25350d86ca720 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Sun, 17 May 2026 01:47:56 +0600 Subject: [PATCH] fix: assorted bug fixes for CH migration Fix resource leaks, race conditions, and error handling across host agent and control plane: proper sparse file cleanup on close error, connect error wrapping for MakeDir, CoW file cleanup on pause failure, per-sandbox VM directories, deferred map deletion to avoid race in VM destroy, and goroutine launch for extension background workers. --- internal/devicemapper/devicemapper.go | 6 +++++- internal/hostagent/server.go | 6 +++++- internal/sandbox/manager.go | 1 + internal/vm/config.go | 2 +- internal/vm/manager.go | 5 ++++- pkg/cpserver/run.go | 2 +- 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/internal/devicemapper/devicemapper.go b/internal/devicemapper/devicemapper.go index f65ec6a..f53b109 100644 --- a/internal/devicemapper/devicemapper.go +++ b/internal/devicemapper/devicemapper.go @@ -379,5 +379,9 @@ func createSparseFile(path string, sizeBytes int64) error { os.Remove(path) return err } - return f.Close() + if err := f.Close(); err != nil { + os.Remove(path) + return err + } + return nil } diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go index 816a99f..faf6424 100644 --- a/internal/hostagent/server.go +++ b/internal/hostagent/server.go @@ -301,7 +301,7 @@ func (s *Server) MakeDir( resp, err := client.MakeDir(ctx, msg.Path) if err != nil { - return nil, fmt.Errorf("make dir: %w", err) + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("make dir: %w", err)) } return connect.NewResponse(&pb.MakeDirResponse{ @@ -373,6 +373,8 @@ func (s *Server) ExecStream( Error: ev.Error, }, } + default: + continue } if err := stream.Send(&resp); err != nil { return err @@ -889,6 +891,8 @@ func (s *Server) ConnectProcess( Error: ev.Error, }, } + default: + continue } if err := stream.Send(&resp); err != nil { return err diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 8b3f85d..9aed83f 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -541,6 +541,7 @@ func (m *Manager) Pause(ctx context.Context, sandboxID string) error { warnErr("snapshot dir cleanup error", sandboxID, os.RemoveAll(pauseDir)) warnErr("network cleanup error during pause", sandboxID, network.RemoveNetwork(sb.slot)) m.slots.Release(sb.SlotIndex) + os.Remove(sb.dmDevice.CowPath) if sb.baseImagePath != "" { m.loops.Release(sb.baseImagePath) } diff --git a/internal/vm/config.go b/internal/vm/config.go index f3c2341..565bacb 100644 --- a/internal/vm/config.go +++ b/internal/vm/config.go @@ -77,7 +77,7 @@ func (c *VMConfig) applyDefaults() { c.SocketPath = fmt.Sprintf("/tmp/ch-%s.sock", c.SandboxID) } if c.SandboxDir == "" { - c.SandboxDir = "/tmp/ch-vm" + c.SandboxDir = fmt.Sprintf("/tmp/ch-vm-%s", c.SandboxID) } if c.TapDevice == "" { c.TapDevice = "tap0" diff --git a/internal/vm/manager.go b/internal/vm/manager.go index 5f8539e..3233a49 100644 --- a/internal/vm/manager.go +++ b/internal/vm/manager.go @@ -143,7 +143,6 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error { m.mu.Unlock() return fmt.Errorf("VM not found: %s", sandboxID) } - delete(m.vms, sandboxID) m.mu.Unlock() slog.Info("destroying VM", "sandbox", sandboxID) @@ -161,6 +160,10 @@ func (m *Manager) Destroy(ctx context.Context, sandboxID string) error { os.Remove(vm.Config.SocketPath) + m.mu.Lock() + delete(m.vms, sandboxID) + m.mu.Unlock() + slog.Info("VM destroyed", "sandbox", sandboxID) return nil } diff --git a/pkg/cpserver/run.go b/pkg/cpserver/run.go index 1fbf6eb..2b7a7a1 100644 --- a/pkg/cpserver/run.go +++ b/pkg/cpserver/run.go @@ -255,7 +255,7 @@ func Run(opts ...Option) { // Start extension background workers. for _, ext := range o.extensions { for _, worker := range ext.BackgroundWorkers(sctx) { - worker(ctx) + go worker(ctx) } }