1
0
forked from wrenn/wrenn
Files
wrenn-releases/internal/vm/cleanup.go
Rafeed M. Bhuiyan 05ddf62399 v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev>

Reviewed-on: wrenn/wrenn#50
2026-05-24 21:10:37 +00:00

105 lines
2.6 KiB
Go

package vm
import (
"log/slog"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
"time"
)
// CleanupStaleProcesses kills any cloud-hypervisor processes left behind by a
// previous agent that crashed without graceful shutdown. Must run at agent
// startup before devicemapper.CleanupStaleDevices — a still-running CH process
// holds the dm-snapshot open and would cause "Device or resource busy" on
// dmsetup remove.
//
// Matches processes by argv containing the wrenn CH API socket path
// (/tmp/ch-<sandboxID>.sock) so we don't kill unrelated cloud-hypervisor VMs
// the operator may be running.
//
// Also removes stale /tmp/ch-*.sock files once the owning process is gone.
func CleanupStaleProcesses() {
socketPattern := regexp.MustCompile(`/tmp/ch-[A-Za-z0-9-]+\.sock`)
pids, err := scanProcs()
if err != nil {
slog.Debug("scan procs failed", "error", err)
return
}
killed := 0
for _, pid := range pids {
cmdline, err := readCmdline(pid)
if err != nil {
continue
}
if !strings.Contains(cmdline, "cloud-hypervisor") {
continue
}
if !socketPattern.MatchString(cmdline) {
continue
}
slog.Warn("killing stale cloud-hypervisor process", "pid", pid, "cmdline", cmdline)
if err := syscall.Kill(pid, syscall.SIGTERM); err != nil {
slog.Warn("SIGTERM stale CH failed", "pid", pid, "error", err)
}
killed++
}
// Give SIGTERM'd processes a brief window to exit so subsequent dm/loop
// teardown sees no open fd, then SIGKILL anything still alive.
if killed > 0 {
time.Sleep(500 * time.Millisecond)
for _, pid := range pids {
cmdline, err := readCmdline(pid)
if err != nil {
continue
}
if !strings.Contains(cmdline, "cloud-hypervisor") || !socketPattern.MatchString(cmdline) {
continue
}
_ = syscall.Kill(pid, syscall.SIGKILL)
}
time.Sleep(200 * time.Millisecond)
}
matches, _ := filepath.Glob("/tmp/ch-*.sock")
for _, sock := range matches {
if err := os.Remove(sock); err == nil {
slog.Info("removed stale CH socket", "path", sock)
}
}
}
func scanProcs() ([]int, error) {
entries, err := os.ReadDir("/proc")
if err != nil {
return nil, err
}
var pids []int
for _, e := range entries {
if !e.IsDir() {
continue
}
pid, err := strconv.Atoi(e.Name())
if err != nil {
continue
}
pids = append(pids, pid)
}
return pids, nil
}
func readCmdline(pid int) (string, error) {
b, err := os.ReadFile("/proc/" + strconv.Itoa(pid) + "/cmdline")
if err != nil {
return "", err
}
// /proc/<pid>/cmdline is NUL-separated; convert to spaces for substring match.
return strings.ReplaceAll(string(b), "\x00", " "), nil
}