1
0
forked from wrenn/wrenn
Files
wrenn-releases/internal/api/handlers_sandbox_events.go
Rafeed M. Bhuiyan 05ddf62399 v0.2.0 (#50)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev>

Reviewed-on: wrenn/wrenn#50
2026-05-24 21:10:37 +00:00

170 lines
5.2 KiB
Go

package api
import (
"context"
"encoding/json"
"log/slog"
"net/http"
"time"
"github.com/jackc/pgx/v5/pgtype"
"git.omukk.dev/wrenn/wrenn/pkg/auth"
"git.omukk.dev/wrenn/wrenn/pkg/channels"
"git.omukk.dev/wrenn/wrenn/pkg/db"
"git.omukk.dev/wrenn/wrenn/pkg/events"
"git.omukk.dev/wrenn/wrenn/pkg/id"
)
type sandboxEventHandler struct {
db *db.Queries
eventPub *channels.Publisher
}
func newSandboxEventHandler(queries *db.Queries, eventPub *channels.Publisher) *sandboxEventHandler {
return &sandboxEventHandler{db: queries, eventPub: eventPub}
}
type sandboxEventRequest struct {
Event string `json:"event"`
SandboxID string `json:"sandbox_id"`
HostID string `json:"host_id"`
HostIP string `json:"host_ip,omitempty"`
Error string `json:"error,omitempty"`
Timestamp int64 `json:"timestamp"`
}
// Handle receives lifecycle event callbacks from host agents, translates the
// raw host event into the canonical events.Event taxonomy, and publishes once
// to the unified Redis stream. The SandboxEventConsumer (independent
// consumer group) drives DB reconciliation; the channels dispatcher delivers
// to subscribed channels; the SSE relay mirrors via Pub/Sub.
func (h *sandboxEventHandler) Handle(w http.ResponseWriter, r *http.Request) {
var req sandboxEventRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "invalid JSON body")
return
}
if req.Event == "" || req.SandboxID == "" || req.HostID == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "event, sandbox_id, and host_id are required")
return
}
hc := auth.MustHostFromContext(r.Context())
callerHostID := id.FormatHostID(hc.HostID)
if callerHostID != req.HostID {
writeError(w, http.StatusForbidden, "forbidden", "host_id does not match authenticated host")
return
}
if req.Timestamp == 0 {
req.Timestamp = time.Now().Unix()
}
evt, ok := h.translate(r.Context(), req)
if !ok {
// Unknown event type — log and accept so the host agent doesn't retry.
slog.Warn("sandbox event callback: untranslatable event", "event", req.Event, "sandbox_id", req.SandboxID)
w.WriteHeader(http.StatusNoContent)
return
}
h.eventPub.Publish(r.Context(), evt)
w.WriteHeader(http.StatusNoContent)
}
// translate converts a raw host-agent callback into the canonical event.
// For failure events without an in-flight verb (e.g. sandbox.failed), the
// current DB status is consulted to pick the appropriate verb.
func (h *sandboxEventHandler) translate(ctx context.Context, req sandboxEventRequest) (events.Event, bool) {
sandboxUUID, parseErr := id.ParseSandboxID(req.SandboxID)
if parseErr != nil {
return events.Event{}, false
}
var teamID pgtype.UUID
if sb, dbErr := h.db.GetSandbox(ctx, sandboxUUID); dbErr == nil {
teamID = sb.TeamID
}
base := events.Event{
Timestamp: time.Unix(req.Timestamp, 0).UTC().Format(time.RFC3339),
TeamID: id.FormatTeamID(teamID),
Actor: events.SystemActor(),
Resource: events.Resource{ID: req.SandboxID, Type: "sandbox"},
}
switch req.Event {
case "sandbox.started":
meta := map[string]string{}
if req.HostIP != "" {
meta["host_ip"] = req.HostIP
}
meta["host_id"] = req.HostID
base.Event = events.CapsuleCreate
base.Outcome = events.OutcomeSuccess
base.Metadata = meta
case "sandbox.resumed":
meta := map[string]string{"host_id": req.HostID}
if req.HostIP != "" {
meta["host_ip"] = req.HostIP
}
base.Event = events.CapsuleResume
base.Outcome = events.OutcomeSuccess
base.Metadata = meta
case "sandbox.paused":
base.Event = events.CapsulePause
base.Outcome = events.OutcomeSuccess
case "sandbox.auto_paused":
base.Event = events.CapsulePause
base.Outcome = events.OutcomeSuccess
base.Metadata = map[string]string{"reason": "ttl_expired"}
case "sandbox.stopped":
base.Event = events.CapsuleDestroy
base.Outcome = events.OutcomeSuccess
case "sandbox.pause_failed":
base.Event = events.CapsulePause
base.Outcome = events.OutcomeError
base.Error = req.Error
base.Metadata = map[string]string{"reason": "host_failure"}
case "sandbox.resume_failed":
base.Event = events.CapsuleResume
base.Outcome = events.OutcomeError
base.Error = req.Error
base.Metadata = map[string]string{"reason": "host_failure"}
case "sandbox.failed", "sandbox.error":
// Pick a verb based on the sandbox's current DB status.
verb := h.verbForFailure(ctx, sandboxUUID)
base.Event = verb
base.Outcome = events.OutcomeError
base.Error = req.Error
base.Metadata = map[string]string{"reason": "host_failure"}
default:
return events.Event{}, false
}
return base, true
}
func (h *sandboxEventHandler) verbForFailure(ctx context.Context, sandboxID pgtype.UUID) string {
sb, err := h.db.GetSandbox(ctx, sandboxID)
if err != nil {
return events.CapsuleDestroy
}
switch sb.Status {
case "starting":
return events.CapsuleCreate
case "resuming":
return events.CapsuleResume
case "pausing":
return events.CapsulePause
case "snapshotting":
// A snapshot pauses then resumes the VM; a host-side failure leaves the
// sandbox errored, not destroyed. Route through CapsuleCreate so the
// consumer's handleFailed marks it "error" rather than removing the row.
return events.CapsuleCreate
default:
return events.CapsuleDestroy
}
}