diff --git a/cmd/control-plane/main.go b/cmd/control-plane/main.go index 2b22e65..7562e3b 100644 --- a/cmd/control-plane/main.go +++ b/cmd/control-plane/main.go @@ -59,7 +59,7 @@ func main() { srv := api.New(queries, agentClient, pool, []byte(cfg.JWTSecret)) // Start reconciler. - reconciler := api.NewReconciler(queries, agentClient, "default", 30*time.Second) + reconciler := api.NewReconciler(queries, agentClient, "default", 5*time.Second) reconciler.Start(ctx) httpServer := &http.Server{ diff --git a/db/migrations/20260310094104_initial.sql b/db/migrations/20260310094104_initial.sql index 017b12e..c291815 100644 --- a/db/migrations/20260310094104_initial.sql +++ b/db/migrations/20260310094104_initial.sql @@ -8,7 +8,7 @@ CREATE TABLE sandboxes ( status TEXT NOT NULL DEFAULT 'pending', vcpus INTEGER NOT NULL DEFAULT 1, memory_mb INTEGER NOT NULL DEFAULT 512, - timeout_sec INTEGER NOT NULL DEFAULT 300, + timeout_sec INTEGER NOT NULL DEFAULT 0, guest_ip TEXT NOT NULL DEFAULT '', host_ip TEXT NOT NULL DEFAULT '', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), diff --git a/go.mod b/go.mod index 82bd17a..109b6ad 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.3 github.com/jackc/pgx/v5 v5.8.0 + github.com/joho/godotenv v1.5.1 github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f golang.org/x/crypto v0.49.0 @@ -20,7 +21,6 @@ require ( github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect - github.com/joho/godotenv v1.5.1 // indirect golang.org/x/sync v0.20.0 // indirect golang.org/x/text v0.35.0 // indirect ) diff --git a/internal/api/handlers_sandbox.go b/internal/api/handlers_sandbox.go index 5ffd008..50a629b 100644 --- a/internal/api/handlers_sandbox.go +++ b/internal/api/handlers_sandbox.go @@ -99,9 +99,7 @@ func (h *sandboxHandler) Create(w http.ResponseWriter, r *http.Request) { if req.MemoryMB <= 0 { req.MemoryMB = 512 } - if req.TimeoutSec <= 0 { - req.TimeoutSec = 300 - } + // timeout_sec = 0 means no auto-pause; only set if explicitly requested. ctx := r.Context() ac := auth.MustFromContext(ctx) @@ -259,7 +257,8 @@ func (h *sandboxHandler) Resume(w http.ResponseWriter, r *http.Request) { } resp, err := h.agent.ResumeSandbox(ctx, connect.NewRequest(&pb.ResumeSandboxRequest{ - SandboxId: sandboxID, + SandboxId: sandboxID, + TimeoutSec: sb.TimeoutSec, })) if err != nil { status, code, msg := agentErrToHTTP(err) @@ -285,6 +284,44 @@ func (h *sandboxHandler) Resume(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, sandboxToResponse(sb)) } +// Ping handles POST /v1/sandboxes/{id}/ping. +// Resets the inactivity timer for a running sandbox. +func (h *sandboxHandler) Ping(w http.ResponseWriter, r *http.Request) { + sandboxID := chi.URLParam(r, "id") + ctx := r.Context() + ac := auth.MustFromContext(ctx) + + sb, err := h.db.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: ac.TeamID}) + if err != nil { + writeError(w, http.StatusNotFound, "not_found", "sandbox not found") + return + } + if sb.Status != "running" { + writeError(w, http.StatusConflict, "invalid_state", "sandbox is not running") + return + } + + if _, err := h.agent.PingSandbox(ctx, connect.NewRequest(&pb.PingSandboxRequest{ + SandboxId: sandboxID, + })); err != nil { + status, code, msg := agentErrToHTTP(err) + writeError(w, status, code, msg) + return + } + + if err := h.db.UpdateLastActive(ctx, db.UpdateLastActiveParams{ + ID: sandboxID, + LastActiveAt: pgtype.Timestamptz{ + Time: time.Now(), + Valid: true, + }, + }); err != nil { + slog.Warn("ping: failed to update last_active_at in DB", "sandbox_id", sandboxID, "error", err) + } + + w.WriteHeader(http.StatusNoContent) +} + // Destroy handles DELETE /v1/sandboxes/{id}. func (h *sandboxHandler) Destroy(w http.ResponseWriter, r *http.Request) { sandboxID := chi.URLParam(r, "id") diff --git a/internal/api/handlers_test_ui.go b/internal/api/handlers_test_ui.go index 1161866..f40f183 100644 --- a/internal/api/handlers_test_ui.go +++ b/internal/api/handlers_test_ui.go @@ -175,8 +175,8 @@ const testUIHTML = ` - - + +
@@ -417,7 +417,7 @@ function renderSandboxes(sandboxes) { document.getElementById('sandboxes-table').innerHTML = '

No sandboxes

'; return; } - let html = ''; + let html = '
IDStatusTemplatevCPUsMemHost IPCreatedActions
'; for (const sb of sandboxes) { html += ''; html += ''; @@ -425,10 +425,12 @@ function renderSandboxes(sandboxes) { html += ''; html += ''; html += ''; + html += ''; html += ''; html += ''; html += '
IDStatusTemplatevCPUsMemTTLHost IPCreatedActions
' + sb.id + '' + esc(sb.template) + '' + sb.vcpus + '' + sb.memory_mb + 'MB' + (sb.timeout_sec ? sb.timeout_sec + 's' : '-') + '' + (sb.host_ip || '-') + '' + new Date(sb.created_at).toLocaleTimeString() + '
'; if (sb.status === 'running') { + html += ''; html += ''; html += ''; } else if (sb.status === 'paused') { @@ -497,6 +499,16 @@ async function destroySandbox(id) { } } +async function pingSandbox(id) { + log('Pinging ' + id + '...', 'info'); + try { + await api('POST', '/v1/sandboxes/' + id + '/ping', null, 'apikey'); + log('Pinged ' + id + ' — inactivity timer reset', 'ok'); + } catch (e) { + log('Ping failed: ' + e.message, 'err'); + } +} + // --- Exec --- async function execCmd() { diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index 6ca4ff1..689ce70 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -249,6 +249,40 @@ paths: schema: $ref: "#/components/schemas/Error" + /v1/sandboxes/{id}/ping: + parameters: + - name: id + in: path + required: true + schema: + type: string + + post: + summary: Reset sandbox inactivity timer + operationId: pingSandbox + tags: [sandboxes] + security: + - apiKeyAuth: [] + description: | + Resets the last_active_at timestamp for a running sandbox, preventing + the auto-pause TTL from expiring. Use this as a keepalive for sandboxes + that are idle but should remain running. + responses: + "204": + description: Ping acknowledged, inactivity timer reset + "404": + description: Sandbox not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "409": + description: Sandbox not running + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /v1/sandboxes/{id}/pause: parameters: - name: id @@ -721,7 +755,11 @@ components: default: 512 timeout_sec: type: integer - default: 300 + default: 0 + description: > + Auto-pause TTL in seconds. The sandbox is automatically paused + after this duration of inactivity (no exec or ping). 0 means + no auto-pause. Sandbox: type: object diff --git a/internal/api/reconciler.go b/internal/api/reconciler.go index cd792c7..fcc2388 100644 --- a/internal/api/reconciler.go +++ b/internal/api/reconciler.go @@ -49,7 +49,8 @@ func (rc *Reconciler) Start(ctx context.Context) { } func (rc *Reconciler) reconcile(ctx context.Context) { - // Get all sandboxes the host agent knows about. + // Single RPC returns both the running sandbox list and any IDs that + // were auto-paused by the TTL reaper since the last call. resp, err := rc.agent.ListSandboxes(ctx, connect.NewRequest(&pb.ListSandboxesRequest{})) if err != nil { slog.Warn("reconciler: failed to list sandboxes from host agent", "error", err) @@ -62,6 +63,12 @@ func (rc *Reconciler) reconcile(ctx context.Context) { alive[sb.SandboxId] = struct{}{} } + // Build auto-paused set from the same response. + autoPausedSet := make(map[string]struct{}, len(resp.Msg.AutoPausedSandboxIds)) + for _, id := range resp.Msg.AutoPausedSandboxIds { + autoPausedSet[id] = struct{}{} + } + // Get all DB sandboxes for this host that are running. // Paused sandboxes are excluded: they are expected to not exist on the // host agent because pause = snapshot + destroy resources. @@ -86,12 +93,34 @@ func (rc *Reconciler) reconcile(ctx context.Context) { return } - slog.Info("reconciler: marking stale sandboxes as stopped", "count", len(stale), "ids", stale) + // Split stale sandboxes into those auto-paused by the TTL reaper vs + // those that crashed/were orphaned. + var toPause, toStop []string + for _, id := range stale { + if _, ok := autoPausedSet[id]; ok { + toPause = append(toPause, id) + } else { + toStop = append(toStop, id) + } + } - if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{ - Column1: stale, - Status: "stopped", - }); err != nil { - slog.Warn("reconciler: failed to update stale sandboxes", "error", err) + if len(toPause) > 0 { + slog.Info("reconciler: marking auto-paused sandboxes", "count", len(toPause), "ids", toPause) + if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{ + Column1: toPause, + Status: "paused", + }); err != nil { + slog.Warn("reconciler: failed to mark auto-paused sandboxes", "error", err) + } + } + + if len(toStop) > 0 { + slog.Info("reconciler: marking stale sandboxes as stopped", "count", len(toStop), "ids", toStop) + if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{ + Column1: toStop, + Status: "stopped", + }); err != nil { + slog.Warn("reconciler: failed to update stale sandboxes", "error", err) + } } } diff --git a/internal/api/server.go b/internal/api/server.go index 286bf3e..af3a81d 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -64,6 +64,7 @@ func New(queries *db.Queries, agent hostagentv1connect.HostAgentServiceClient, p r.Delete("/", sandbox.Destroy) r.Post("/exec", exec.Exec) r.Get("/exec/stream", execStream.ExecStream) + r.Post("/ping", sandbox.Ping) r.Post("/pause", sandbox.Pause) r.Post("/resume", sandbox.Resume) r.Post("/files/write", files.Upload) diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go index b0524d2..d545e59 100644 --- a/internal/hostagent/server.go +++ b/internal/hostagent/server.go @@ -8,6 +8,7 @@ import ( "mime/multipart" "net/http" "net/url" + "strings" "time" "connectrpc.com/connect" @@ -71,7 +72,7 @@ func (s *Server) ResumeSandbox( ctx context.Context, req *connect.Request[pb.ResumeSandboxRequest], ) (*connect.Response[pb.ResumeSandboxResponse], error) { - sb, err := s.mgr.Resume(ctx, req.Msg.SandboxId) + sb, err := s.mgr.Resume(ctx, req.Msg.SandboxId, int(req.Msg.TimeoutSec)) if err != nil { return nil, connect.NewError(connect.CodeInternal, err) } @@ -106,6 +107,19 @@ func (s *Server) DeleteSnapshot( return connect.NewResponse(&pb.DeleteSnapshotResponse{}), nil } +func (s *Server) PingSandbox( + ctx context.Context, + req *connect.Request[pb.PingSandboxRequest], +) (*connect.Response[pb.PingSandboxResponse], error) { + if err := s.mgr.Ping(req.Msg.SandboxId); err != nil { + if strings.Contains(err.Error(), "not found") { + return nil, connect.NewError(connect.CodeNotFound, err) + } + return nil, connect.NewError(connect.CodeFailedPrecondition, err) + } + return connect.NewResponse(&pb.PingSandboxResponse{}), nil +} + func (s *Server) Exec( ctx context.Context, req *connect.Request[pb.ExecRequest], @@ -394,6 +408,7 @@ func (s *Server) ListSandboxes( } return connect.NewResponse(&pb.ListSandboxesResponse{ - Sandboxes: infos, + Sandboxes: infos, + AutoPausedSandboxIds: s.mgr.DrainAutoPausedIDs(), }), nil } diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 62fe1bc..bd90e0a 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -40,6 +40,9 @@ type Manager struct { mu sync.RWMutex boxes map[string]*sandboxState stopCh chan struct{} + + autoPausedMu sync.Mutex + autoPausedIDs []string } // sandboxState holds the runtime state for a single sandbox. @@ -459,7 +462,7 @@ func (m *Manager) Pause(ctx context.Context, sandboxID string) error { // Resume restores a paused sandbox from its snapshot using UFFD for // lazy memory loading. The sandbox gets a new network slot. -func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox, error) { +func (m *Manager) Resume(ctx context.Context, sandboxID string, timeoutSec int) (*models.Sandbox, error) { snapDir := m.cfg.SnapshotsDir if !snapshot.Exists(snapDir, sandboxID) { return nil, fmt.Errorf("no snapshot found for sandbox %s", sandboxID) @@ -575,7 +578,7 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox SandboxID: sandboxID, KernelPath: m.cfg.KernelPath, RootfsPath: dmDev.DevicePath, - VCPUs: 1, // Placeholder; overridden by snapshot. + VCPUs: 1, // Placeholder; overridden by snapshot. MemoryMB: int(header.Metadata.Size / (1024 * 1024)), // Placeholder; overridden by snapshot. NetworkNamespace: slot.NamespaceID, TapDevice: slot.TapName, @@ -622,7 +625,7 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox Template: "", VCPUs: vmCfg.VCPUs, MemoryMB: vmCfg.MemoryMB, - TimeoutSec: 0, + TimeoutSec: timeoutSec, SlotIndex: slotIdx, HostIP: slot.HostIP, RootfsPath: dmDev.DevicePath, @@ -1033,6 +1036,33 @@ func (m *Manager) GetClient(sandboxID string) (*envdclient.Client, error) { return sb.client, nil } +// Ping resets the inactivity timer for a running sandbox. +func (m *Manager) Ping(sandboxID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + sb, ok := m.boxes[sandboxID] + if !ok { + return fmt.Errorf("sandbox not found: %s", sandboxID) + } + if sb.Status != models.StatusRunning { + return fmt.Errorf("sandbox %s is not running (status: %s)", sandboxID, sb.Status) + } + sb.LastActiveAt = time.Now() + return nil +} + +// DrainAutoPausedIDs returns and clears the list of sandbox IDs that were +// automatically paused by the TTL reaper since the last call. +func (m *Manager) DrainAutoPausedIDs() []string { + m.autoPausedMu.Lock() + defer m.autoPausedMu.Unlock() + + ids := m.autoPausedIDs + m.autoPausedIDs = nil + return ids +} + func (m *Manager) get(sandboxID string) (*sandboxState, error) { m.mu.RLock() defer m.mu.RUnlock() @@ -1048,7 +1078,7 @@ func (m *Manager) get(sandboxID string) (*sandboxState, error) { // that have exceeded their TTL (timeout_sec of inactivity). func (m *Manager) StartTTLReaper(ctx context.Context) { go func() { - ticker := time.NewTicker(10 * time.Second) + ticker := time.NewTicker(2 * time.Second) defer ticker.Stop() for { @@ -1064,7 +1094,7 @@ func (m *Manager) StartTTLReaper(ctx context.Context) { }() } -func (m *Manager) reapExpired(ctx context.Context) { +func (m *Manager) reapExpired(_ context.Context) { m.mu.RLock() var expired []string now := time.Now() @@ -1072,7 +1102,7 @@ func (m *Manager) reapExpired(ctx context.Context) { if sb.TimeoutSec <= 0 { continue } - if sb.Status != models.StatusRunning && sb.Status != models.StatusPaused { + if sb.Status != models.StatusRunning { continue } if now.Sub(sb.LastActiveAt) > time.Duration(sb.TimeoutSec)*time.Second { @@ -1082,10 +1112,23 @@ func (m *Manager) reapExpired(ctx context.Context) { m.mu.RUnlock() for _, id := range expired { - slog.Info("TTL expired, destroying sandbox", "id", id) - if err := m.Destroy(ctx, id); err != nil { - slog.Warn("TTL reap failed", "id", id, "error", err) + slog.Info("TTL expired, auto-pausing sandbox", "id", id) + // Use a detached context so that an app shutdown does not cancel + // a pause mid-flight, which would leave the VM frozen without a + // valid snapshot. + pauseCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + err := m.Pause(pauseCtx, id) + cancel() + if err != nil { + slog.Warn("TTL auto-pause failed, destroying sandbox", "id", id, "error", err) + if destroyErr := m.Destroy(context.Background(), id); destroyErr != nil { + slog.Warn("TTL destroy after failed pause also failed", "id", id, "error", destroyErr) + } + continue } + m.autoPausedMu.Lock() + m.autoPausedIDs = append(m.autoPausedIDs, id) + m.autoPausedMu.Unlock() } } diff --git a/proto/hostagent/gen/hostagent.pb.go b/proto/hostagent/gen/hostagent.pb.go index 824cfb6..447f1f7 100644 --- a/proto/hostagent/gen/hostagent.pb.go +++ b/proto/hostagent/gen/hostagent.pb.go @@ -31,8 +31,8 @@ type CreateSandboxRequest struct { Vcpus int32 `protobuf:"varint,2,opt,name=vcpus,proto3" json:"vcpus,omitempty"` // Memory in MB (default: 512). MemoryMb int32 `protobuf:"varint,3,opt,name=memory_mb,json=memoryMb,proto3" json:"memory_mb,omitempty"` - // TTL in seconds. Sandbox is auto-destroyed after this duration of - // inactivity. 0 means no auto-destroy. + // TTL in seconds. Sandbox is auto-paused after this duration of + // inactivity. 0 means no auto-pause. TimeoutSec int32 `protobuf:"varint,4,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache @@ -324,8 +324,11 @@ func (*PauseSandboxResponse) Descriptor() ([]byte, []int) { } type ResumeSandboxRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + // TTL in seconds restored from the DB so the reaper can auto-pause + // the sandbox again after inactivity. 0 means no auto-pause. + TimeoutSec int32 `protobuf:"varint,2,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -367,6 +370,13 @@ func (x *ResumeSandboxRequest) GetSandboxId() string { return "" } +func (x *ResumeSandboxRequest) GetTimeoutSec() int32 { + if x != nil { + return x.TimeoutSec + } + return 0 +} + type ResumeSandboxResponse struct { state protoimpl.MessageState `protogen:"open.v1"` SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` @@ -777,10 +787,13 @@ func (*ListSandboxesRequest) Descriptor() ([]byte, []int) { } type ListSandboxesResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Sandboxes []*SandboxInfo `protobuf:"bytes,1,rep,name=sandboxes,proto3" json:"sandboxes,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Sandboxes []*SandboxInfo `protobuf:"bytes,1,rep,name=sandboxes,proto3" json:"sandboxes,omitempty"` + // IDs of sandboxes that were automatically paused by the TTL reaper + // since the last call. Drained on read. + AutoPausedSandboxIds []string `protobuf:"bytes,2,rep,name=auto_paused_sandbox_ids,json=autoPausedSandboxIds,proto3" json:"auto_paused_sandbox_ids,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *ListSandboxesResponse) Reset() { @@ -820,6 +833,13 @@ func (x *ListSandboxesResponse) GetSandboxes() []*SandboxInfo { return nil } +func (x *ListSandboxesResponse) GetAutoPausedSandboxIds() []string { + if x != nil { + return x.AutoPausedSandboxIds + } + return nil +} + type SandboxInfo struct { state protoimpl.MessageState `protogen:"open.v1"` SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` @@ -1730,6 +1750,86 @@ func (x *ReadFileStreamResponse) GetChunk() []byte { return nil } +type PingSandboxRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PingSandboxRequest) Reset() { + *x = PingSandboxRequest{} + mi := &file_hostagent_proto_msgTypes[31] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PingSandboxRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PingSandboxRequest) ProtoMessage() {} + +func (x *PingSandboxRequest) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[31] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PingSandboxRequest.ProtoReflect.Descriptor instead. +func (*PingSandboxRequest) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{31} +} + +func (x *PingSandboxRequest) GetSandboxId() string { + if x != nil { + return x.SandboxId + } + return "" +} + +type PingSandboxResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PingSandboxResponse) Reset() { + *x = PingSandboxResponse{} + mi := &file_hostagent_proto_msgTypes[32] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PingSandboxResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PingSandboxResponse) ProtoMessage() {} + +func (x *PingSandboxResponse) ProtoReflect() protoreflect.Message { + mi := &file_hostagent_proto_msgTypes[32] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PingSandboxResponse.ProtoReflect.Descriptor instead. +func (*PingSandboxResponse) Descriptor() ([]byte, []int) { + return file_hostagent_proto_rawDescGZIP(), []int{32} +} + var File_hostagent_proto protoreflect.FileDescriptor const file_hostagent_proto_rawDesc = "" + @@ -1755,10 +1855,12 @@ const file_hostagent_proto_rawDesc = "" + "\x13PauseSandboxRequest\x12\x1d\n" + "\n" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x16\n" + - "\x14PauseSandboxResponse\"5\n" + + "\x14PauseSandboxResponse\"V\n" + "\x14ResumeSandboxRequest\x12\x1d\n" + "\n" + - "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"g\n" + + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x1f\n" + + "\vtimeout_sec\x18\x02 \x01(\x05R\n" + + "timeoutSec\"g\n" + "\x15ResumeSandboxResponse\x12\x1d\n" + "\n" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x16\n" + @@ -1786,9 +1888,10 @@ const file_hostagent_proto_rawDesc = "" + "\x06stdout\x18\x01 \x01(\fR\x06stdout\x12\x16\n" + "\x06stderr\x18\x02 \x01(\fR\x06stderr\x12\x1b\n" + "\texit_code\x18\x03 \x01(\x05R\bexitCode\"\x16\n" + - "\x14ListSandboxesRequest\"P\n" + + "\x14ListSandboxesRequest\"\x87\x01\n" + "\x15ListSandboxesResponse\x127\n" + - "\tsandboxes\x18\x01 \x03(\v2\x19.hostagent.v1.SandboxInfoR\tsandboxes\"\xa4\x02\n" + + "\tsandboxes\x18\x01 \x03(\v2\x19.hostagent.v1.SandboxInfoR\tsandboxes\x125\n" + + "\x17auto_paused_sandbox_ids\x18\x02 \x03(\tR\x14autoPausedSandboxIds\"\xa4\x02\n" + "\vSandboxInfo\x12\x1d\n" + "\n" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x16\n" + @@ -1848,7 +1951,11 @@ const file_hostagent_proto_rawDesc = "" + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x12\n" + "\x04path\x18\x02 \x01(\tR\x04path\".\n" + "\x16ReadFileStreamResponse\x12\x14\n" + - "\x05chunk\x18\x01 \x01(\fR\x05chunk2\xfa\b\n" + + "\x05chunk\x18\x01 \x01(\fR\x05chunk\"3\n" + + "\x12PingSandboxRequest\x12\x1d\n" + + "\n" + + "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x15\n" + + "\x13PingSandboxResponse2\xce\t\n" + "\x10HostAgentService\x12X\n" + "\rCreateSandbox\x12\".hostagent.v1.CreateSandboxRequest\x1a#.hostagent.v1.CreateSandboxResponse\x12[\n" + "\x0eDestroySandbox\x12#.hostagent.v1.DestroySandboxRequest\x1a$.hostagent.v1.DestroySandboxResponse\x12U\n" + @@ -1863,7 +1970,8 @@ const file_hostagent_proto_rawDesc = "" + "\n" + "ExecStream\x12\x1f.hostagent.v1.ExecStreamRequest\x1a .hostagent.v1.ExecStreamResponse0\x01\x12`\n" + "\x0fWriteFileStream\x12$.hostagent.v1.WriteFileStreamRequest\x1a%.hostagent.v1.WriteFileStreamResponse(\x01\x12]\n" + - "\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01B\xb0\x01\n" + + "\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01\x12R\n" + + "\vPingSandbox\x12 .hostagent.v1.PingSandboxRequest\x1a!.hostagent.v1.PingSandboxResponseB\xb0\x01\n" + "\x10com.hostagent.v1B\x0eHostagentProtoP\x01Z;git.omukk.dev/wrenn/sandbox/proto/hostagent/gen;hostagentv1\xa2\x02\x03HXX\xaa\x02\fHostagent.V1\xca\x02\fHostagent\\V1\xe2\x02\x18Hostagent\\V1\\GPBMetadata\xea\x02\rHostagent::V1b\x06proto3" var ( @@ -1878,7 +1986,7 @@ func file_hostagent_proto_rawDescGZIP() []byte { return file_hostagent_proto_rawDescData } -var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 31) +var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 33) var file_hostagent_proto_goTypes = []any{ (*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest (*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse @@ -1911,6 +2019,8 @@ var file_hostagent_proto_goTypes = []any{ (*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse (*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest (*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse + (*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest + (*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse } var file_hostagent_proto_depIdxs = []int32{ 16, // 0: hostagent.v1.ListSandboxesResponse.sandboxes:type_name -> hostagent.v1.SandboxInfo @@ -1931,21 +2041,23 @@ var file_hostagent_proto_depIdxs = []int32{ 21, // 15: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest 26, // 16: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest 29, // 17: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest - 1, // 18: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse - 3, // 19: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse - 5, // 20: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse - 7, // 21: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse - 13, // 22: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse - 15, // 23: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse - 18, // 24: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse - 20, // 25: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse - 9, // 26: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse - 11, // 27: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse - 22, // 28: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse - 28, // 29: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse - 30, // 30: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse - 18, // [18:31] is the sub-list for method output_type - 5, // [5:18] is the sub-list for method input_type + 31, // 18: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest + 1, // 19: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse + 3, // 20: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse + 5, // 21: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse + 7, // 22: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse + 13, // 23: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse + 15, // 24: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse + 18, // 25: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse + 20, // 26: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse + 9, // 27: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse + 11, // 28: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse + 22, // 29: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse + 28, // 30: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse + 30, // 31: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse + 32, // 32: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse + 19, // [19:33] is the sub-list for method output_type + 5, // [5:19] is the sub-list for method input_type 5, // [5:5] is the sub-list for extension type_name 5, // [5:5] is the sub-list for extension extendee 0, // [0:5] is the sub-list for field type_name @@ -1975,7 +2087,7 @@ func file_hostagent_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_hostagent_proto_rawDesc), len(file_hostagent_proto_rawDesc)), NumEnums: 0, - NumMessages: 31, + NumMessages: 33, NumExtensions: 0, NumServices: 1, }, diff --git a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go index f82f777..6eb5d45 100644 --- a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go +++ b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go @@ -71,6 +71,9 @@ const ( // HostAgentServiceReadFileStreamProcedure is the fully-qualified name of the HostAgentService's // ReadFileStream RPC. HostAgentServiceReadFileStreamProcedure = "/hostagent.v1.HostAgentService/ReadFileStream" + // HostAgentServicePingSandboxProcedure is the fully-qualified name of the HostAgentService's + // PingSandbox RPC. + HostAgentServicePingSandboxProcedure = "/hostagent.v1.HostAgentService/PingSandbox" ) // HostAgentServiceClient is a client for the hostagent.v1.HostAgentService service. @@ -103,6 +106,8 @@ type HostAgentServiceClient interface { WriteFileStream(context.Context) *connect.ClientStreamForClient[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse] // ReadFileStream reads a file from a sandbox and streams it back in chunks. ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest]) (*connect.ServerStreamForClient[gen.ReadFileStreamResponse], error) + // PingSandbox resets the inactivity timer for a running sandbox. + PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) } // NewHostAgentServiceClient constructs a client for the hostagent.v1.HostAgentService service. By @@ -194,6 +199,12 @@ func NewHostAgentServiceClient(httpClient connect.HTTPClient, baseURL string, op connect.WithSchema(hostAgentServiceMethods.ByName("ReadFileStream")), connect.WithClientOptions(opts...), ), + pingSandbox: connect.NewClient[gen.PingSandboxRequest, gen.PingSandboxResponse]( + httpClient, + baseURL+HostAgentServicePingSandboxProcedure, + connect.WithSchema(hostAgentServiceMethods.ByName("PingSandbox")), + connect.WithClientOptions(opts...), + ), } } @@ -212,6 +223,7 @@ type hostAgentServiceClient struct { execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse] writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse] readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse] + pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse] } // CreateSandbox calls hostagent.v1.HostAgentService.CreateSandbox. @@ -279,6 +291,11 @@ func (c *hostAgentServiceClient) ReadFileStream(ctx context.Context, req *connec return c.readFileStream.CallServerStream(ctx, req) } +// PingSandbox calls hostagent.v1.HostAgentService.PingSandbox. +func (c *hostAgentServiceClient) PingSandbox(ctx context.Context, req *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) { + return c.pingSandbox.CallUnary(ctx, req) +} + // HostAgentServiceHandler is an implementation of the hostagent.v1.HostAgentService service. type HostAgentServiceHandler interface { // CreateSandbox boots a new microVM with the given configuration. @@ -309,6 +326,8 @@ type HostAgentServiceHandler interface { WriteFileStream(context.Context, *connect.ClientStream[gen.WriteFileStreamRequest]) (*connect.Response[gen.WriteFileStreamResponse], error) // ReadFileStream reads a file from a sandbox and streams it back in chunks. ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest], *connect.ServerStream[gen.ReadFileStreamResponse]) error + // PingSandbox resets the inactivity timer for a running sandbox. + PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) } // NewHostAgentServiceHandler builds an HTTP handler from the service implementation. It returns the @@ -396,6 +415,12 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han connect.WithSchema(hostAgentServiceMethods.ByName("ReadFileStream")), connect.WithHandlerOptions(opts...), ) + hostAgentServicePingSandboxHandler := connect.NewUnaryHandler( + HostAgentServicePingSandboxProcedure, + svc.PingSandbox, + connect.WithSchema(hostAgentServiceMethods.ByName("PingSandbox")), + connect.WithHandlerOptions(opts...), + ) return "/hostagent.v1.HostAgentService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case HostAgentServiceCreateSandboxProcedure: @@ -424,6 +449,8 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han hostAgentServiceWriteFileStreamHandler.ServeHTTP(w, r) case HostAgentServiceReadFileStreamProcedure: hostAgentServiceReadFileStreamHandler.ServeHTTP(w, r) + case HostAgentServicePingSandboxProcedure: + hostAgentServicePingSandboxHandler.ServeHTTP(w, r) default: http.NotFound(w, r) } @@ -484,3 +511,7 @@ func (UnimplementedHostAgentServiceHandler) WriteFileStream(context.Context, *co func (UnimplementedHostAgentServiceHandler) ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest], *connect.ServerStream[gen.ReadFileStreamResponse]) error { return connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.ReadFileStream is not implemented")) } + +func (UnimplementedHostAgentServiceHandler) PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) { + return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.PingSandbox is not implemented")) +} diff --git a/proto/hostagent/hostagent.proto b/proto/hostagent/hostagent.proto index 78c671a..b9ceccf 100644 --- a/proto/hostagent/hostagent.proto +++ b/proto/hostagent/hostagent.proto @@ -45,6 +45,10 @@ service HostAgentService { // ReadFileStream reads a file from a sandbox and streams it back in chunks. rpc ReadFileStream(ReadFileStreamRequest) returns (stream ReadFileStreamResponse); + + // PingSandbox resets the inactivity timer for a running sandbox. + rpc PingSandbox(PingSandboxRequest) returns (PingSandboxResponse); + } message CreateSandboxRequest { @@ -60,8 +64,8 @@ message CreateSandboxRequest { // Memory in MB (default: 512). int32 memory_mb = 3; - // TTL in seconds. Sandbox is auto-destroyed after this duration of - // inactivity. 0 means no auto-destroy. + // TTL in seconds. Sandbox is auto-paused after this duration of + // inactivity. 0 means no auto-pause. int32 timeout_sec = 4; } @@ -85,6 +89,10 @@ message PauseSandboxResponse {} message ResumeSandboxRequest { string sandbox_id = 1; + + // TTL in seconds restored from the DB so the reaper can auto-pause + // the sandbox again after inactivity. 0 means no auto-pause. + int32 timeout_sec = 2; } message ResumeSandboxResponse { @@ -127,6 +135,10 @@ message ListSandboxesRequest {} message ListSandboxesResponse { repeated SandboxInfo sandboxes = 1; + + // IDs of sandboxes that were automatically paused by the TTL reaper + // since the last call. Drained on read. + repeated string auto_paused_sandbox_ids = 2; } message SandboxInfo { @@ -215,3 +227,12 @@ message ReadFileStreamRequest { message ReadFileStreamResponse { bytes chunk = 1; } + +// ── Ping ──────────────────────────────────────────────────────────── + +message PingSandboxRequest { + string sandbox_id = 1; +} + +message PingSandboxResponse {} +