forked from wrenn/wrenn
Replaces the hardcoded CP_HOST_AGENT_ADDR single-agent setup with a DB-driven registration system supporting multiple host agents (BYOC). Key changes: - Host agents register via one-time token, receive a 7-day JWT + 60-day refresh token; heartbeat loop auto-refreshes on 401/403 and pauses all sandboxes if refresh fails - HostClientPool: lazy Connect RPC client cache keyed by host ID, replacing the single static agent client throughout the API and service layers - RoundRobinScheduler: picks an online host for each new sandbox via ListActiveHosts; extensible for future scheduling strategies - HostMonitor (replaces Reconciler): passive heartbeat staleness check marks hosts unreachable and sandboxes missing after 90s; active reconciliation per online host restores missing-but-alive sandboxes and stops orphans - Graceful host delete: returns 409 with affected sandbox list without ?force=true; force-delete destroys sandboxes then evicts pool client - Snapshot delete broadcasts to all online hosts (templates have no host_id) - sandbox.Manager.PauseAll: pauses all running VMs on CP connectivity loss - New migration: host_refresh_tokens table with token rotation (issue-then- revoke ordering to prevent lockout on mid-rotation crash) - New sandbox status 'missing' (reversible, unlike 'stopped') and host status 'unreachable'; both reflected in OpenAPI spec - Fix: refresh token auth failure now returns 401 (was 400 via generic 'invalid' substring match in serviceErrToHTTP)
136 lines
3.4 KiB
Go
136 lines
3.4 KiB
Go
package api
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"net/http"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"connectrpc.com/connect"
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/jackc/pgx/v5/pgtype"
|
|
|
|
"git.omukk.dev/wrenn/sandbox/internal/auth"
|
|
"git.omukk.dev/wrenn/sandbox/internal/db"
|
|
"git.omukk.dev/wrenn/sandbox/internal/lifecycle"
|
|
pb "git.omukk.dev/wrenn/sandbox/proto/hostagent/gen"
|
|
)
|
|
|
|
type execHandler struct {
|
|
db *db.Queries
|
|
pool *lifecycle.HostClientPool
|
|
}
|
|
|
|
func newExecHandler(db *db.Queries, pool *lifecycle.HostClientPool) *execHandler {
|
|
return &execHandler{db: db, pool: pool}
|
|
}
|
|
|
|
type execRequest struct {
|
|
Cmd string `json:"cmd"`
|
|
Args []string `json:"args"`
|
|
TimeoutSec int32 `json:"timeout_sec"`
|
|
}
|
|
|
|
type execResponse struct {
|
|
SandboxID string `json:"sandbox_id"`
|
|
Cmd string `json:"cmd"`
|
|
Stdout string `json:"stdout"`
|
|
Stderr string `json:"stderr"`
|
|
ExitCode int32 `json:"exit_code"`
|
|
DurationMs int64 `json:"duration_ms"`
|
|
// Encoding is "utf-8" for text output, "base64" for binary output.
|
|
Encoding string `json:"encoding"`
|
|
}
|
|
|
|
// Exec handles POST /v1/sandboxes/{id}/exec.
|
|
func (h *execHandler) Exec(w http.ResponseWriter, r *http.Request) {
|
|
sandboxID := chi.URLParam(r, "id")
|
|
ctx := r.Context()
|
|
ac := auth.MustFromContext(ctx)
|
|
|
|
sb, err := h.db.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: ac.TeamID})
|
|
if err != nil {
|
|
writeError(w, http.StatusNotFound, "not_found", "sandbox not found")
|
|
return
|
|
}
|
|
if sb.Status != "running" {
|
|
writeError(w, http.StatusConflict, "invalid_state", "sandbox is not running (status: "+sb.Status+")")
|
|
return
|
|
}
|
|
|
|
var req execRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
writeError(w, http.StatusBadRequest, "invalid_request", "invalid JSON body")
|
|
return
|
|
}
|
|
|
|
if req.Cmd == "" {
|
|
writeError(w, http.StatusBadRequest, "invalid_request", "cmd is required")
|
|
return
|
|
}
|
|
|
|
start := time.Now()
|
|
|
|
agent, err := agentForHost(ctx, h.db, h.pool, sb.HostID)
|
|
if err != nil {
|
|
writeError(w, http.StatusServiceUnavailable, "host_unavailable", "sandbox host is not reachable")
|
|
return
|
|
}
|
|
|
|
resp, err := agent.Exec(ctx, connect.NewRequest(&pb.ExecRequest{
|
|
SandboxId: sandboxID,
|
|
Cmd: req.Cmd,
|
|
Args: req.Args,
|
|
TimeoutSec: req.TimeoutSec,
|
|
}))
|
|
if err != nil {
|
|
status, code, msg := agentErrToHTTP(err)
|
|
writeError(w, status, code, msg)
|
|
return
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
|
|
// Update last active.
|
|
if err := h.db.UpdateLastActive(ctx, db.UpdateLastActiveParams{
|
|
ID: sandboxID,
|
|
LastActiveAt: pgtype.Timestamptz{
|
|
Time: time.Now(),
|
|
Valid: true,
|
|
},
|
|
}); err != nil {
|
|
slog.Warn("failed to update last_active_at", "id", sandboxID, "error", err)
|
|
}
|
|
|
|
// Use base64 encoding if output contains non-UTF-8 bytes.
|
|
stdout := resp.Msg.Stdout
|
|
stderr := resp.Msg.Stderr
|
|
encoding := "utf-8"
|
|
|
|
if !utf8.Valid(stdout) || !utf8.Valid(stderr) {
|
|
encoding = "base64"
|
|
writeJSON(w, http.StatusOK, execResponse{
|
|
SandboxID: sandboxID,
|
|
Cmd: req.Cmd,
|
|
Stdout: base64.StdEncoding.EncodeToString(stdout),
|
|
Stderr: base64.StdEncoding.EncodeToString(stderr),
|
|
ExitCode: resp.Msg.ExitCode,
|
|
DurationMs: duration.Milliseconds(),
|
|
Encoding: encoding,
|
|
})
|
|
return
|
|
}
|
|
|
|
writeJSON(w, http.StatusOK, execResponse{
|
|
SandboxID: sandboxID,
|
|
Cmd: req.Cmd,
|
|
Stdout: string(stdout),
|
|
Stderr: string(stderr),
|
|
ExitCode: resp.Msg.ExitCode,
|
|
DurationMs: duration.Milliseconds(),
|
|
Encoding: encoding,
|
|
})
|
|
}
|