forked from wrenn/wrenn
Fix review issues: detached contexts, loop device leak, timer leak, size_bytes
- Use context.Background() with timeout in destroySandbox/failBuild so cleanup and DB writes survive parent context cancellation on shutdown - Fix loop device refcount leak in FlattenRootfs when dmDevice is nil - Replace time.After with time.NewTimer in healthcheck polling to avoid goroutine leak when healthcheck passes early - Capture size_bytes from CreateSnapshot/FlattenRootfs RPC responses instead of hardcoding 0 in the templates table insert - Avoid leaking internal error details to API clients in build handler
This commit is contained in:
@ -3,6 +3,7 @@ package api
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -119,7 +120,8 @@ func (h *buildHandler) Create(w http.ResponseWriter, r *http.Request) {
|
|||||||
MemoryMB: req.MemoryMB,
|
MemoryMB: req.MemoryMB,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, "build_error", err.Error())
|
slog.Error("failed to create build", "error", err)
|
||||||
|
writeError(w, http.StatusInternalServerError, "build_error", "failed to create build")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -839,6 +839,8 @@ func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID, name string) (in
|
|||||||
|
|
||||||
outputPath := snapshot.RootfsPath(m.cfg.ImagesDir, name)
|
outputPath := snapshot.RootfsPath(m.cfg.ImagesDir, name)
|
||||||
if sb.dmDevice == nil {
|
if sb.dmDevice == nil {
|
||||||
|
m.cleanupDM(sb)
|
||||||
|
warnErr("template dir cleanup error", name, snapshot.Remove(m.cfg.ImagesDir, name))
|
||||||
return 0, fmt.Errorf("sandbox %s has no dm device", sandboxID)
|
return 0, fmt.Errorf("sandbox %s has no dm device", sandboxID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -255,6 +255,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Healthcheck or direct snapshot.
|
// Healthcheck or direct snapshot.
|
||||||
|
var sizeBytes int64
|
||||||
if build.Healthcheck.Valid && build.Healthcheck.String != "" {
|
if build.Healthcheck.Valid && build.Healthcheck.String != "" {
|
||||||
log.Info("running healthcheck", "cmd", build.Healthcheck.String)
|
log.Info("running healthcheck", "cmd", build.Healthcheck.String)
|
||||||
if err := s.waitForHealthcheck(ctx, agent, sandboxID, build.Healthcheck.String); err != nil {
|
if err := s.waitForHealthcheck(ctx, agent, sandboxID, build.Healthcheck.String); err != nil {
|
||||||
@ -265,25 +266,29 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
|
|||||||
|
|
||||||
// Healthcheck passed → full snapshot (with memory/CPU state).
|
// Healthcheck passed → full snapshot (with memory/CPU state).
|
||||||
log.Info("healthcheck passed, creating snapshot")
|
log.Info("healthcheck passed, creating snapshot")
|
||||||
if _, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{
|
snapResp, err := agent.CreateSnapshot(ctx, connect.NewRequest(&pb.CreateSnapshotRequest{
|
||||||
SandboxId: sandboxID,
|
SandboxId: sandboxID,
|
||||||
Name: build.Name,
|
Name: build.Name,
|
||||||
})); err != nil {
|
}))
|
||||||
|
if err != nil {
|
||||||
s.destroySandbox(ctx, agent, sandboxID)
|
s.destroySandbox(ctx, agent, sandboxID)
|
||||||
s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
|
s.failBuild(ctx, buildID, fmt.Sprintf("create snapshot failed: %v", err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
sizeBytes = snapResp.Msg.SizeBytes
|
||||||
} else {
|
} else {
|
||||||
// No healthcheck → image-only template (rootfs only).
|
// No healthcheck → image-only template (rootfs only).
|
||||||
log.Info("no healthcheck, flattening rootfs")
|
log.Info("no healthcheck, flattening rootfs")
|
||||||
if _, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{
|
flatResp, err := agent.FlattenRootfs(ctx, connect.NewRequest(&pb.FlattenRootfsRequest{
|
||||||
SandboxId: sandboxID,
|
SandboxId: sandboxID,
|
||||||
Name: build.Name,
|
Name: build.Name,
|
||||||
})); err != nil {
|
}))
|
||||||
|
if err != nil {
|
||||||
s.destroySandbox(ctx, agent, sandboxID)
|
s.destroySandbox(ctx, agent, sandboxID)
|
||||||
s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
|
s.failBuild(ctx, buildID, fmt.Sprintf("flatten rootfs failed: %v", err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
sizeBytes = flatResp.Msg.SizeBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert into templates table as a global (platform) template.
|
// Insert into templates table as a global (platform) template.
|
||||||
@ -297,7 +302,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
|
|||||||
Type: templateType,
|
Type: templateType,
|
||||||
Vcpus: pgtype.Int4{Int32: build.Vcpus, Valid: true},
|
Vcpus: pgtype.Int4{Int32: build.Vcpus, Valid: true},
|
||||||
MemoryMb: pgtype.Int4{Int32: build.MemoryMb, Valid: true},
|
MemoryMb: pgtype.Int4{Int32: build.MemoryMb, Valid: true},
|
||||||
SizeBytes: 0, // Could query the host, but the template is created.
|
SizeBytes: sizeBytes,
|
||||||
TeamID: platformTeamID,
|
TeamID: platformTeamID,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
log.Error("failed to insert template record", "error", err)
|
log.Error("failed to insert template record", "error", err)
|
||||||
@ -319,7 +324,8 @@ func (s *BuildService) executeBuild(ctx context.Context, buildID string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxID, cmd string) error {
|
func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxID, cmd string) error {
|
||||||
deadline := time.After(healthcheckTimeout)
|
deadline := time.NewTimer(healthcheckTimeout)
|
||||||
|
defer deadline.Stop()
|
||||||
ticker := time.NewTicker(healthcheckInterval)
|
ticker := time.NewTicker(healthcheckInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
@ -327,7 +333,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC
|
|||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
case <-deadline:
|
case <-deadline.C:
|
||||||
return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout)
|
return fmt.Errorf("healthcheck timed out after %s", healthcheckTimeout)
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
execCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||||
@ -366,8 +372,11 @@ func (s *BuildService) updateLogs(ctx context.Context, buildID string, step int,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BuildService) failBuild(ctx context.Context, buildID, errMsg string) {
|
func (s *BuildService) failBuild(_ context.Context, buildID, errMsg string) {
|
||||||
slog.Error("build failed", "build_id", buildID, "error", errMsg)
|
slog.Error("build failed", "build_id", buildID, "error", errMsg)
|
||||||
|
// Use a detached context so DB writes survive parent context cancellation (e.g. shutdown).
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
if err := s.DB.UpdateBuildError(ctx, db.UpdateBuildErrorParams{
|
if err := s.DB.UpdateBuildError(ctx, db.UpdateBuildErrorParams{
|
||||||
ID: buildID,
|
ID: buildID,
|
||||||
Error: pgtype.Text{String: errMsg, Valid: true},
|
Error: pgtype.Text{String: errMsg, Valid: true},
|
||||||
@ -376,7 +385,10 @@ func (s *BuildService) failBuild(ctx context.Context, buildID, errMsg string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BuildService) destroySandbox(ctx context.Context, agent buildAgentClient, sandboxID string) {
|
func (s *BuildService) destroySandbox(_ context.Context, agent buildAgentClient, sandboxID string) {
|
||||||
|
// Use a detached context so cleanup succeeds even during shutdown.
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{
|
if _, err := agent.DestroySandbox(ctx, connect.NewRequest(&pb.DestroySandboxRequest{
|
||||||
SandboxId: sandboxID,
|
SandboxId: sandboxID,
|
||||||
})); err != nil {
|
})); err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user