1
0
forked from wrenn/wrenn

Fix review issues: detached contexts, loop device leak, timer leak, size_bytes

- Use context.Background() with timeout in destroySandbox/failBuild so
  cleanup and DB writes survive parent context cancellation on shutdown
- Fix loop device refcount leak in FlattenRootfs when dmDevice is nil
- Replace time.After with time.NewTimer in healthcheck polling to avoid
  goroutine leak when healthcheck passes early
- Capture size_bytes from CreateSnapshot/FlattenRootfs RPC responses
  instead of hardcoding 0 in the templates table insert
- Avoid leaking internal error details to API clients in build handler
This commit is contained in:
2026-03-26 15:31:38 +06:00
parent 1ce62934b3
commit cdd89a7cee
3 changed files with 26 additions and 10 deletions

View File

@ -3,6 +3,7 @@ package api
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog"
"net/http" "net/http"
"time" "time"
@ -119,7 +120,8 @@ func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) {
MemoryMB: req.MemoryMB, MemoryMB: req.MemoryMB,
}) })
if err != nil { if err != nil {
writeError(w, http.StatusInternalServerError, "build_error", err.Error()) slog.Error("failed to create build", "error", err)
writeError(w, http.StatusInternalServerError, "build_error", "failed to create build")
return return
} }

View File

@ -839,6 +839,8 @@ func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID, name string) (in
outputPath := snapshot.RootfsPath(m.cfg.ImagesDir, name) outputPath := snapshot.RootfsPath(m.cfg.ImagesDir, name)
if sb.dmDevice == nil { if sb.dmDevice == nil {
m.cleanupDM(sb)
warnErr("template dir cleanup error", name, snapshot.Remove(m.cfg.ImagesDir, name))
return 0, fmt.Errorf("sandbox %s has no dm device", sandboxID) return 0, fmt.Errorf("sandbox %s has no dm device", sandboxID)
} }

View File

@ -255,6 +255,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
} }
// Healthcheck or direct snapshot. // Healthcheck or direct snapshot.
var sizeBytes int64
if build.Healthcheck.Valid && build.Healthcheck.String != "" { if build.Healthcheck.Valid && build.Healthcheck.String != "" {
log.Info("running healthcheck", "cmd", build.Healthcheck.String) log.Info("running healthcheck", "cmd", build.Healthcheck.String)
if err := s.waitForHealthcheck(ctx, agent, sandboxID, build.Healthcheck.String); err != nil { if err := s.waitForHealthcheck(ctx, agent, sandboxID, build.Healthcheck.String); err != nil {
@ -265,25 +266,29 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
// Healthcheck passed → full snapshot (with memory/CPU state). // Healthcheck passed → full snapshot (with memory/CPU state).
log.Info("healthcheck passed, creating snapshot") log.Info("healthcheck passed, creating snapshot")
if _, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{ snapResp, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{
SandboxId: sandboxID, SandboxId: sandboxID,
Name: build.Name, Name: build.Name,
})); err != nil { }))
if err != nil {
s.destroySandbox(ctx, agent, sandboxID) s.destroySandbox(ctx, agent, sandboxID)
s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err)) s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
return return
} }
sizeBytes = snapResp.Msg.SizeBytes
} else { } else {
// No healthcheck → image-only template (rootfs only). // No healthcheck → image-only template (rootfs only).
log.Info("no healthcheck, flattening rootfs") log.Info("no healthcheck, flattening rootfs")
if _, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{ flatResp, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{
SandboxId: sandboxID, SandboxId: sandboxID,
Name: build.Name, Name: build.Name,
})); err != nil { }))
if err != nil {
s.destroySandbox(ctx, agent, sandboxID) s.destroySandbox(ctx, agent, sandboxID)
s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err)) s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
return return
} }
sizeBytes = flatResp.Msg.SizeBytes
} }
// Insert into templates table as a global (platform) template. // Insert into templates table as a global (platform) template.
@ -297,7 +302,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
Type: templateType, Type: templateType,
Vcpus: pgtype.Int4{Int32: build.Vcpus, Valid: true}, Vcpus: pgtype.Int4{Int32: build.Vcpus, Valid: true},
MemoryMb: pgtype.Int4{Int32: build.MemoryMb, Valid: true}, MemoryMb: pgtype.Int4{Int32: build.MemoryMb, Valid: true},
SizeBytes: 0, // Could query the host, but the template is created. SizeBytes: sizeBytes,
TeamID: platformTeamID, TeamID: platformTeamID,
}); err != nil { }); err != nil {
log.Error("failed to insert template record", "error", err) log.Error("failed to insert template record", "error", err)
@ -319,7 +324,8 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
} }
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxID, cmd string) error { func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxID, cmd string) error {
deadline := time.After(healthcheckTimeout) deadline := time.NewTimer(healthcheckTimeout)
defer deadline.Stop()
ticker := time.NewTicker(healthcheckInterval) ticker := time.NewTicker(healthcheckInterval)
defer ticker.Stop() defer ticker.Stop()
@ -327,7 +333,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
case <-deadline: case <-deadline.C:
return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout) return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout)
case <-ticker.C: case <-ticker.C:
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second) execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
@ -366,8 +372,11 @@ func (s *BuildService) updateLogs(ctx context.Context, buildID string, step int,
} }
} }
func (s *BuildService) failBuild(ctx context.Context, buildID, errMsg string) { func (s *BuildService) failBuild(_ context.Context, buildID, errMsg string) {
slog.Error("build failed", "build_id", buildID, "error", errMsg) slog.Error("build failed", "build_id", buildID, "error", errMsg)
// Use a detached context so DB writes survive parent context cancellation (e.g. shutdown).
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := s.DB.UpdateBuildError(ctx, db.UpdateBuildErrorParams{ if err := s.DB.UpdateBuildError(ctx, db.UpdateBuildErrorParams{
ID: buildID, ID: buildID,
Error: pgtype.Text{String: errMsg, Valid: true}, Error: pgtype.Text{String: errMsg, Valid: true},
@ -376,7 +385,10 @@ func (s *BuildService) failBuild(ctx context.Context, buildID, errMsg string) {
} }
} }
func (s *BuildService) destroySandbox(ctx context.Context, agent buildAgentClient, sandboxID string) { func (s *BuildService) destroySandbox(_ context.Context, agent buildAgentClient, sandboxID string) {
// Use a detached context so cleanup succeeds even during shutdown.
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{ if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{
SandboxId: sandboxID, SandboxId: sandboxID,
})); err != nil { })); err != nil {