diff --git a/cmd/control-plane/main.go b/cmd/control-plane/main.go index 2b22e65..7562e3b 100644 --- a/cmd/control-plane/main.go +++ b/cmd/control-plane/main.go @@ -59,7 +59,7 @@ func main() { srv := api.New(queries, agentClient, pool, []byte(cfg.JWTSecret)) // Start reconciler. - reconciler := api.NewReconciler(queries, agentClient, "default", 30*time.Second) + reconciler := api.NewReconciler(queries, agentClient, "default", 5*time.Second) reconciler.Start(ctx) httpServer := &http.Server{ diff --git a/db/migrations/20260310094104_initial.sql b/db/migrations/20260310094104_initial.sql index 017b12e..c291815 100644 --- a/db/migrations/20260310094104_initial.sql +++ b/db/migrations/20260310094104_initial.sql @@ -8,7 +8,7 @@ CREATE TABLE sandboxes ( status TEXT NOT NULL DEFAULT 'pending', vcpus INTEGER NOT NULL DEFAULT 1, memory_mb INTEGER NOT NULL DEFAULT 512, - timeout_sec INTEGER NOT NULL DEFAULT 300, + timeout_sec INTEGER NOT NULL DEFAULT 0, guest_ip TEXT NOT NULL DEFAULT '', host_ip TEXT NOT NULL DEFAULT '', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), diff --git a/go.mod b/go.mod index 82bd17a..109b6ad 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.3 github.com/jackc/pgx/v5 v5.8.0 + github.com/joho/godotenv v1.5.1 github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f golang.org/x/crypto v0.49.0 @@ -20,7 +21,6 @@ require ( github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect - github.com/joho/godotenv v1.5.1 // indirect golang.org/x/sync v0.20.0 // indirect golang.org/x/text v0.35.0 // indirect ) diff --git a/internal/api/handlers_sandbox.go b/internal/api/handlers_sandbox.go index 5ffd008..50a629b 100644 --- a/internal/api/handlers_sandbox.go +++ b/internal/api/handlers_sandbox.go @@ -99,9 +99,7 @@ func (h *sandboxHandler) Create(w http.ResponseWriter, r *http.Request) { if req.MemoryMB <= 0 { req.MemoryMB = 512 } - if req.TimeoutSec <= 0 { - req.TimeoutSec = 300 - } + // timeout_sec = 0 means no auto-pause; only set if explicitly requested. ctx := r.Context() ac := auth.MustFromContext(ctx) @@ -259,7 +257,8 @@ func (h *sandboxHandler) Resume(w http.ResponseWriter, r *http.Request) { } resp, err := h.agent.ResumeSandbox(ctx, connect.NewRequest(&pb.ResumeSandboxRequest{ - SandboxId: sandboxID, + SandboxId: sandboxID, + TimeoutSec: sb.TimeoutSec, })) if err != nil { status, code, msg := agentErrToHTTP(err) @@ -285,6 +284,44 @@ func (h *sandboxHandler) Resume(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, sandboxToResponse(sb)) } +// Ping handles POST /v1/sandboxes/{id}/ping. +// Resets the inactivity timer for a running sandbox. +func (h *sandboxHandler) Ping(w http.ResponseWriter, r *http.Request) { + sandboxID := chi.URLParam(r, "id") + ctx := r.Context() + ac := auth.MustFromContext(ctx) + + sb, err := h.db.GetSandboxByTeam(ctx, db.GetSandboxByTeamParams{ID: sandboxID, TeamID: ac.TeamID}) + if err != nil { + writeError(w, http.StatusNotFound, "not_found", "sandbox not found") + return + } + if sb.Status != "running" { + writeError(w, http.StatusConflict, "invalid_state", "sandbox is not running") + return + } + + if _, err := h.agent.PingSandbox(ctx, connect.NewRequest(&pb.PingSandboxRequest{ + SandboxId: sandboxID, + })); err != nil { + status, code, msg := agentErrToHTTP(err) + writeError(w, status, code, msg) + return + } + + if err := h.db.UpdateLastActive(ctx, db.UpdateLastActiveParams{ + ID: sandboxID, + LastActiveAt: pgtype.Timestamptz{ + Time: time.Now(), + Valid: true, + }, + }); err != nil { + slog.Warn("ping: failed to update last_active_at in DB", "sandbox_id", sandboxID, "error", err) + } + + w.WriteHeader(http.StatusNoContent) +} + // Destroy handles DELETE /v1/sandboxes/{id}. func (h *sandboxHandler) Destroy(w http.ResponseWriter, r *http.Request) { sandboxID := chi.URLParam(r, "id") diff --git a/internal/api/handlers_test_ui.go b/internal/api/handlers_test_ui.go index 1161866..f40f183 100644 --- a/internal/api/handlers_test_ui.go +++ b/internal/api/handlers_test_ui.go @@ -175,8 +175,8 @@ const testUIHTML = ` - - + +
No sandboxes
'; return; } - let html = '| ID | Status | Template | vCPUs | Mem | Host IP | Created | Actions |
|---|
| ID | Status | Template | vCPUs | Mem | TTL | Host IP | Created | Actions |
|---|---|---|---|---|---|---|---|---|
| ' + sb.id + ' | '; @@ -425,10 +425,12 @@ function renderSandboxes(sandboxes) { html += '' + esc(sb.template) + ' | '; html += '' + sb.vcpus + ' | '; html += '' + sb.memory_mb + 'MB | '; + html += '' + (sb.timeout_sec ? sb.timeout_sec + 's' : '-') + ' | '; html += '' + (sb.host_ip || '-') + ' | '; html += '' + new Date(sb.created_at).toLocaleTimeString() + ' | '; html += '';
if (sb.status === 'running') {
+ html += '';
html += '';
html += '';
} else if (sb.status === 'paused') {
@@ -497,6 +499,16 @@ async function destroySandbox(id) {
}
}
+async function pingSandbox(id) {
+ log('Pinging ' + id + '...', 'info');
+ try {
+ await api('POST', '/v1/sandboxes/' + id + '/ping', null, 'apikey');
+ log('Pinged ' + id + ' — inactivity timer reset', 'ok');
+ } catch (e) {
+ log('Ping failed: ' + e.message, 'err');
+ }
+}
+
// --- Exec ---
async function execCmd() {
diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml
index 6ca4ff1..689ce70 100644
--- a/internal/api/openapi.yaml
+++ b/internal/api/openapi.yaml
@@ -249,6 +249,40 @@ paths:
schema:
$ref: "#/components/schemas/Error"
+ /v1/sandboxes/{id}/ping:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+
+ post:
+ summary: Reset sandbox inactivity timer
+ operationId: pingSandbox
+ tags: [sandboxes]
+ security:
+ - apiKeyAuth: []
+ description: |
+ Resets the last_active_at timestamp for a running sandbox, preventing
+ the auto-pause TTL from expiring. Use this as a keepalive for sandboxes
+ that are idle but should remain running.
+ responses:
+ "204":
+ description: Ping acknowledged, inactivity timer reset
+ "404":
+ description: Sandbox not found
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "409":
+ description: Sandbox not running
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
/v1/sandboxes/{id}/pause:
parameters:
- name: id
@@ -721,7 +755,11 @@ components:
default: 512
timeout_sec:
type: integer
- default: 300
+ default: 0
+ description: >
+ Auto-pause TTL in seconds. The sandbox is automatically paused
+ after this duration of inactivity (no exec or ping). 0 means
+ no auto-pause.
Sandbox:
type: object
diff --git a/internal/api/reconciler.go b/internal/api/reconciler.go
index cd792c7..fcc2388 100644
--- a/internal/api/reconciler.go
+++ b/internal/api/reconciler.go
@@ -49,7 +49,8 @@ func (rc *Reconciler) Start(ctx context.Context) {
}
func (rc *Reconciler) reconcile(ctx context.Context) {
- // Get all sandboxes the host agent knows about.
+ // Single RPC returns both the running sandbox list and any IDs that
+ // were auto-paused by the TTL reaper since the last call.
resp, err := rc.agent.ListSandboxes(ctx, connect.NewRequest(&pb.ListSandboxesRequest{}))
if err != nil {
slog.Warn("reconciler: failed to list sandboxes from host agent", "error", err)
@@ -62,6 +63,12 @@ func (rc *Reconciler) reconcile(ctx context.Context) {
alive[sb.SandboxId] = struct{}{}
}
+ // Build auto-paused set from the same response.
+ autoPausedSet := make(map[string]struct{}, len(resp.Msg.AutoPausedSandboxIds))
+ for _, id := range resp.Msg.AutoPausedSandboxIds {
+ autoPausedSet[id] = struct{}{}
+ }
+
// Get all DB sandboxes for this host that are running.
// Paused sandboxes are excluded: they are expected to not exist on the
// host agent because pause = snapshot + destroy resources.
@@ -86,12 +93,34 @@ func (rc *Reconciler) reconcile(ctx context.Context) {
return
}
- slog.Info("reconciler: marking stale sandboxes as stopped", "count", len(stale), "ids", stale)
+ // Split stale sandboxes into those auto-paused by the TTL reaper vs
+ // those that crashed/were orphaned.
+ var toPause, toStop []string
+ for _, id := range stale {
+ if _, ok := autoPausedSet[id]; ok {
+ toPause = append(toPause, id)
+ } else {
+ toStop = append(toStop, id)
+ }
+ }
- if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{
- Column1: stale,
- Status: "stopped",
- }); err != nil {
- slog.Warn("reconciler: failed to update stale sandboxes", "error", err)
+ if len(toPause) > 0 {
+ slog.Info("reconciler: marking auto-paused sandboxes", "count", len(toPause), "ids", toPause)
+ if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{
+ Column1: toPause,
+ Status: "paused",
+ }); err != nil {
+ slog.Warn("reconciler: failed to mark auto-paused sandboxes", "error", err)
+ }
+ }
+
+ if len(toStop) > 0 {
+ slog.Info("reconciler: marking stale sandboxes as stopped", "count", len(toStop), "ids", toStop)
+ if err := rc.db.BulkUpdateStatusByIDs(ctx, db.BulkUpdateStatusByIDsParams{
+ Column1: toStop,
+ Status: "stopped",
+ }); err != nil {
+ slog.Warn("reconciler: failed to update stale sandboxes", "error", err)
+ }
}
}
diff --git a/internal/api/server.go b/internal/api/server.go
index 286bf3e..af3a81d 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -64,6 +64,7 @@ func New(queries *db.Queries, agent hostagentv1connect.HostAgentServiceClient, p
r.Delete("/", sandbox.Destroy)
r.Post("/exec", exec.Exec)
r.Get("/exec/stream", execStream.ExecStream)
+ r.Post("/ping", sandbox.Ping)
r.Post("/pause", sandbox.Pause)
r.Post("/resume", sandbox.Resume)
r.Post("/files/write", files.Upload)
diff --git a/internal/hostagent/server.go b/internal/hostagent/server.go
index b0524d2..d545e59 100644
--- a/internal/hostagent/server.go
+++ b/internal/hostagent/server.go
@@ -8,6 +8,7 @@ import (
"mime/multipart"
"net/http"
"net/url"
+ "strings"
"time"
"connectrpc.com/connect"
@@ -71,7 +72,7 @@ func (s *Server) ResumeSandbox(
ctx context.Context,
req *connect.Request[pb.ResumeSandboxRequest],
) (*connect.Response[pb.ResumeSandboxResponse], error) {
- sb, err := s.mgr.Resume(ctx, req.Msg.SandboxId)
+ sb, err := s.mgr.Resume(ctx, req.Msg.SandboxId, int(req.Msg.TimeoutSec))
if err != nil {
return nil, connect.NewError(connect.CodeInternal, err)
}
@@ -106,6 +107,19 @@ func (s *Server) DeleteSnapshot(
return connect.NewResponse(&pb.DeleteSnapshotResponse{}), nil
}
+func (s *Server) PingSandbox(
+ ctx context.Context,
+ req *connect.Request[pb.PingSandboxRequest],
+) (*connect.Response[pb.PingSandboxResponse], error) {
+ if err := s.mgr.Ping(req.Msg.SandboxId); err != nil {
+ if strings.Contains(err.Error(), "not found") {
+ return nil, connect.NewError(connect.CodeNotFound, err)
+ }
+ return nil, connect.NewError(connect.CodeFailedPrecondition, err)
+ }
+ return connect.NewResponse(&pb.PingSandboxResponse{}), nil
+}
+
func (s *Server) Exec(
ctx context.Context,
req *connect.Request[pb.ExecRequest],
@@ -394,6 +408,7 @@ func (s *Server) ListSandboxes(
}
return connect.NewResponse(&pb.ListSandboxesResponse{
- Sandboxes: infos,
+ Sandboxes: infos,
+ AutoPausedSandboxIds: s.mgr.DrainAutoPausedIDs(),
}), nil
}
diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go
index 62fe1bc..bd90e0a 100644
--- a/internal/sandbox/manager.go
+++ b/internal/sandbox/manager.go
@@ -40,6 +40,9 @@ type Manager struct {
mu sync.RWMutex
boxes map[string]*sandboxState
stopCh chan struct{}
+
+ autoPausedMu sync.Mutex
+ autoPausedIDs []string
}
// sandboxState holds the runtime state for a single sandbox.
@@ -459,7 +462,7 @@ func (m *Manager) Pause(ctx context.Context, sandboxID string) error {
// Resume restores a paused sandbox from its snapshot using UFFD for
// lazy memory loading. The sandbox gets a new network slot.
-func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox, error) {
+func (m *Manager) Resume(ctx context.Context, sandboxID string, timeoutSec int) (*models.Sandbox, error) {
snapDir := m.cfg.SnapshotsDir
if !snapshot.Exists(snapDir, sandboxID) {
return nil, fmt.Errorf("no snapshot found for sandbox %s", sandboxID)
@@ -575,7 +578,7 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox
SandboxID: sandboxID,
KernelPath: m.cfg.KernelPath,
RootfsPath: dmDev.DevicePath,
- VCPUs: 1, // Placeholder; overridden by snapshot.
+ VCPUs: 1, // Placeholder; overridden by snapshot.
MemoryMB: int(header.Metadata.Size / (1024 * 1024)), // Placeholder; overridden by snapshot.
NetworkNamespace: slot.NamespaceID,
TapDevice: slot.TapName,
@@ -622,7 +625,7 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string) (*models.Sandbox
Template: "",
VCPUs: vmCfg.VCPUs,
MemoryMB: vmCfg.MemoryMB,
- TimeoutSec: 0,
+ TimeoutSec: timeoutSec,
SlotIndex: slotIdx,
HostIP: slot.HostIP,
RootfsPath: dmDev.DevicePath,
@@ -1033,6 +1036,33 @@ func (m *Manager) GetClient(sandboxID string) (*envdclient.Client, error) {
return sb.client, nil
}
+// Ping resets the inactivity timer for a running sandbox.
+func (m *Manager) Ping(sandboxID string) error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ sb, ok := m.boxes[sandboxID]
+ if !ok {
+ return fmt.Errorf("sandbox not found: %s", sandboxID)
+ }
+ if sb.Status != models.StatusRunning {
+ return fmt.Errorf("sandbox %s is not running (status: %s)", sandboxID, sb.Status)
+ }
+ sb.LastActiveAt = time.Now()
+ return nil
+}
+
+// DrainAutoPausedIDs returns and clears the list of sandbox IDs that were
+// automatically paused by the TTL reaper since the last call.
+func (m *Manager) DrainAutoPausedIDs() []string {
+ m.autoPausedMu.Lock()
+ defer m.autoPausedMu.Unlock()
+
+ ids := m.autoPausedIDs
+ m.autoPausedIDs = nil
+ return ids
+}
+
func (m *Manager) get(sandboxID string) (*sandboxState, error) {
m.mu.RLock()
defer m.mu.RUnlock()
@@ -1048,7 +1078,7 @@ func (m *Manager) get(sandboxID string) (*sandboxState, error) {
// that have exceeded their TTL (timeout_sec of inactivity).
func (m *Manager) StartTTLReaper(ctx context.Context) {
go func() {
- ticker := time.NewTicker(10 * time.Second)
+ ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
for {
@@ -1064,7 +1094,7 @@ func (m *Manager) StartTTLReaper(ctx context.Context) {
}()
}
-func (m *Manager) reapExpired(ctx context.Context) {
+func (m *Manager) reapExpired(_ context.Context) {
m.mu.RLock()
var expired []string
now := time.Now()
@@ -1072,7 +1102,7 @@ func (m *Manager) reapExpired(ctx context.Context) {
if sb.TimeoutSec <= 0 {
continue
}
- if sb.Status != models.StatusRunning && sb.Status != models.StatusPaused {
+ if sb.Status != models.StatusRunning {
continue
}
if now.Sub(sb.LastActiveAt) > time.Duration(sb.TimeoutSec)*time.Second {
@@ -1082,10 +1112,23 @@ func (m *Manager) reapExpired(ctx context.Context) {
m.mu.RUnlock()
for _, id := range expired {
- slog.Info("TTL expired, destroying sandbox", "id", id)
- if err := m.Destroy(ctx, id); err != nil {
- slog.Warn("TTL reap failed", "id", id, "error", err)
+ slog.Info("TTL expired, auto-pausing sandbox", "id", id)
+ // Use a detached context so that an app shutdown does not cancel
+ // a pause mid-flight, which would leave the VM frozen without a
+ // valid snapshot.
+ pauseCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+ err := m.Pause(pauseCtx, id)
+ cancel()
+ if err != nil {
+ slog.Warn("TTL auto-pause failed, destroying sandbox", "id", id, "error", err)
+ if destroyErr := m.Destroy(context.Background(), id); destroyErr != nil {
+ slog.Warn("TTL destroy after failed pause also failed", "id", id, "error", destroyErr)
+ }
+ continue
}
+ m.autoPausedMu.Lock()
+ m.autoPausedIDs = append(m.autoPausedIDs, id)
+ m.autoPausedMu.Unlock()
}
}
diff --git a/proto/hostagent/gen/hostagent.pb.go b/proto/hostagent/gen/hostagent.pb.go
index 824cfb6..447f1f7 100644
--- a/proto/hostagent/gen/hostagent.pb.go
+++ b/proto/hostagent/gen/hostagent.pb.go
@@ -31,8 +31,8 @@ type CreateSandboxRequest struct {
Vcpus int32 `protobuf:"varint,2,opt,name=vcpus,proto3" json:"vcpus,omitempty"`
// Memory in MB (default: 512).
MemoryMb int32 `protobuf:"varint,3,opt,name=memory_mb,json=memoryMb,proto3" json:"memory_mb,omitempty"`
- // TTL in seconds. Sandbox is auto-destroyed after this duration of
- // inactivity. 0 means no auto-destroy.
+ // TTL in seconds. Sandbox is auto-paused after this duration of
+ // inactivity. 0 means no auto-pause.
TimeoutSec int32 `protobuf:"varint,4,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
@@ -324,8 +324,11 @@ func (*PauseSandboxResponse) Descriptor() ([]byte, []int) {
}
type ResumeSandboxRequest struct {
- state protoimpl.MessageState `protogen:"open.v1"`
- SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
+ state protoimpl.MessageState `protogen:"open.v1"`
+ SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
+ // TTL in seconds restored from the DB so the reaper can auto-pause
+ // the sandbox again after inactivity. 0 means no auto-pause.
+ TimeoutSec int32 `protobuf:"varint,2,opt,name=timeout_sec,json=timeoutSec,proto3" json:"timeout_sec,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@@ -367,6 +370,13 @@ func (x *ResumeSandboxRequest) GetSandboxId() string {
return ""
}
+func (x *ResumeSandboxRequest) GetTimeoutSec() int32 {
+ if x != nil {
+ return x.TimeoutSec
+ }
+ return 0
+}
+
type ResumeSandboxResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
@@ -777,10 +787,13 @@ func (*ListSandboxesRequest) Descriptor() ([]byte, []int) {
}
type ListSandboxesResponse struct {
- state protoimpl.MessageState `protogen:"open.v1"`
- Sandboxes []*SandboxInfo `protobuf:"bytes,1,rep,name=sandboxes,proto3" json:"sandboxes,omitempty"`
- unknownFields protoimpl.UnknownFields
- sizeCache protoimpl.SizeCache
+ state protoimpl.MessageState `protogen:"open.v1"`
+ Sandboxes []*SandboxInfo `protobuf:"bytes,1,rep,name=sandboxes,proto3" json:"sandboxes,omitempty"`
+ // IDs of sandboxes that were automatically paused by the TTL reaper
+ // since the last call. Drained on read.
+ AutoPausedSandboxIds []string `protobuf:"bytes,2,rep,name=auto_paused_sandbox_ids,json=autoPausedSandboxIds,proto3" json:"auto_paused_sandbox_ids,omitempty"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
}
func (x *ListSandboxesResponse) Reset() {
@@ -820,6 +833,13 @@ func (x *ListSandboxesResponse) GetSandboxes() []*SandboxInfo {
return nil
}
+func (x *ListSandboxesResponse) GetAutoPausedSandboxIds() []string {
+ if x != nil {
+ return x.AutoPausedSandboxIds
+ }
+ return nil
+}
+
type SandboxInfo struct {
state protoimpl.MessageState `protogen:"open.v1"`
SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
@@ -1730,6 +1750,86 @@ func (x *ReadFileStreamResponse) GetChunk() []byte {
return nil
}
+type PingSandboxRequest struct {
+ state protoimpl.MessageState `protogen:"open.v1"`
+ SandboxId string `protobuf:"bytes,1,opt,name=sandbox_id,json=sandboxId,proto3" json:"sandbox_id,omitempty"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
+}
+
+func (x *PingSandboxRequest) Reset() {
+ *x = PingSandboxRequest{}
+ mi := &file_hostagent_proto_msgTypes[31]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+}
+
+func (x *PingSandboxRequest) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PingSandboxRequest) ProtoMessage() {}
+
+func (x *PingSandboxRequest) ProtoReflect() protoreflect.Message {
+ mi := &file_hostagent_proto_msgTypes[31]
+ if x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use PingSandboxRequest.ProtoReflect.Descriptor instead.
+func (*PingSandboxRequest) Descriptor() ([]byte, []int) {
+ return file_hostagent_proto_rawDescGZIP(), []int{31}
+}
+
+func (x *PingSandboxRequest) GetSandboxId() string {
+ if x != nil {
+ return x.SandboxId
+ }
+ return ""
+}
+
+type PingSandboxResponse struct {
+ state protoimpl.MessageState `protogen:"open.v1"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
+}
+
+func (x *PingSandboxResponse) Reset() {
+ *x = PingSandboxResponse{}
+ mi := &file_hostagent_proto_msgTypes[32]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+}
+
+func (x *PingSandboxResponse) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PingSandboxResponse) ProtoMessage() {}
+
+func (x *PingSandboxResponse) ProtoReflect() protoreflect.Message {
+ mi := &file_hostagent_proto_msgTypes[32]
+ if x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use PingSandboxResponse.ProtoReflect.Descriptor instead.
+func (*PingSandboxResponse) Descriptor() ([]byte, []int) {
+ return file_hostagent_proto_rawDescGZIP(), []int{32}
+}
+
var File_hostagent_proto protoreflect.FileDescriptor
const file_hostagent_proto_rawDesc = "" +
@@ -1755,10 +1855,12 @@ const file_hostagent_proto_rawDesc = "" +
"\x13PauseSandboxRequest\x12\x1d\n" +
"\n" +
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x16\n" +
- "\x14PauseSandboxResponse\"5\n" +
+ "\x14PauseSandboxResponse\"V\n" +
"\x14ResumeSandboxRequest\x12\x1d\n" +
"\n" +
- "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"g\n" +
+ "sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x1f\n" +
+ "\vtimeout_sec\x18\x02 \x01(\x05R\n" +
+ "timeoutSec\"g\n" +
"\x15ResumeSandboxResponse\x12\x1d\n" +
"\n" +
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x16\n" +
@@ -1786,9 +1888,10 @@ const file_hostagent_proto_rawDesc = "" +
"\x06stdout\x18\x01 \x01(\fR\x06stdout\x12\x16\n" +
"\x06stderr\x18\x02 \x01(\fR\x06stderr\x12\x1b\n" +
"\texit_code\x18\x03 \x01(\x05R\bexitCode\"\x16\n" +
- "\x14ListSandboxesRequest\"P\n" +
+ "\x14ListSandboxesRequest\"\x87\x01\n" +
"\x15ListSandboxesResponse\x127\n" +
- "\tsandboxes\x18\x01 \x03(\v2\x19.hostagent.v1.SandboxInfoR\tsandboxes\"\xa4\x02\n" +
+ "\tsandboxes\x18\x01 \x03(\v2\x19.hostagent.v1.SandboxInfoR\tsandboxes\x125\n" +
+ "\x17auto_paused_sandbox_ids\x18\x02 \x03(\tR\x14autoPausedSandboxIds\"\xa4\x02\n" +
"\vSandboxInfo\x12\x1d\n" +
"\n" +
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x16\n" +
@@ -1848,7 +1951,11 @@ const file_hostagent_proto_rawDesc = "" +
"sandbox_id\x18\x01 \x01(\tR\tsandboxId\x12\x12\n" +
"\x04path\x18\x02 \x01(\tR\x04path\".\n" +
"\x16ReadFileStreamResponse\x12\x14\n" +
- "\x05chunk\x18\x01 \x01(\fR\x05chunk2\xfa\b\n" +
+ "\x05chunk\x18\x01 \x01(\fR\x05chunk\"3\n" +
+ "\x12PingSandboxRequest\x12\x1d\n" +
+ "\n" +
+ "sandbox_id\x18\x01 \x01(\tR\tsandboxId\"\x15\n" +
+ "\x13PingSandboxResponse2\xce\t\n" +
"\x10HostAgentService\x12X\n" +
"\rCreateSandbox\x12\".hostagent.v1.CreateSandboxRequest\x1a#.hostagent.v1.CreateSandboxResponse\x12[\n" +
"\x0eDestroySandbox\x12#.hostagent.v1.DestroySandboxRequest\x1a$.hostagent.v1.DestroySandboxResponse\x12U\n" +
@@ -1863,7 +1970,8 @@ const file_hostagent_proto_rawDesc = "" +
"\n" +
"ExecStream\x12\x1f.hostagent.v1.ExecStreamRequest\x1a .hostagent.v1.ExecStreamResponse0\x01\x12`\n" +
"\x0fWriteFileStream\x12$.hostagent.v1.WriteFileStreamRequest\x1a%.hostagent.v1.WriteFileStreamResponse(\x01\x12]\n" +
- "\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01B\xb0\x01\n" +
+ "\x0eReadFileStream\x12#.hostagent.v1.ReadFileStreamRequest\x1a$.hostagent.v1.ReadFileStreamResponse0\x01\x12R\n" +
+ "\vPingSandbox\x12 .hostagent.v1.PingSandboxRequest\x1a!.hostagent.v1.PingSandboxResponseB\xb0\x01\n" +
"\x10com.hostagent.v1B\x0eHostagentProtoP\x01Z;git.omukk.dev/wrenn/sandbox/proto/hostagent/gen;hostagentv1\xa2\x02\x03HXX\xaa\x02\fHostagent.V1\xca\x02\fHostagent\\V1\xe2\x02\x18Hostagent\\V1\\GPBMetadata\xea\x02\rHostagent::V1b\x06proto3"
var (
@@ -1878,7 +1986,7 @@ func file_hostagent_proto_rawDescGZIP() []byte {
return file_hostagent_proto_rawDescData
}
-var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 31)
+var file_hostagent_proto_msgTypes = make([]protoimpl.MessageInfo, 33)
var file_hostagent_proto_goTypes = []any{
(*CreateSandboxRequest)(nil), // 0: hostagent.v1.CreateSandboxRequest
(*CreateSandboxResponse)(nil), // 1: hostagent.v1.CreateSandboxResponse
@@ -1911,6 +2019,8 @@ var file_hostagent_proto_goTypes = []any{
(*WriteFileStreamResponse)(nil), // 28: hostagent.v1.WriteFileStreamResponse
(*ReadFileStreamRequest)(nil), // 29: hostagent.v1.ReadFileStreamRequest
(*ReadFileStreamResponse)(nil), // 30: hostagent.v1.ReadFileStreamResponse
+ (*PingSandboxRequest)(nil), // 31: hostagent.v1.PingSandboxRequest
+ (*PingSandboxResponse)(nil), // 32: hostagent.v1.PingSandboxResponse
}
var file_hostagent_proto_depIdxs = []int32{
16, // 0: hostagent.v1.ListSandboxesResponse.sandboxes:type_name -> hostagent.v1.SandboxInfo
@@ -1931,21 +2041,23 @@ var file_hostagent_proto_depIdxs = []int32{
21, // 15: hostagent.v1.HostAgentService.ExecStream:input_type -> hostagent.v1.ExecStreamRequest
26, // 16: hostagent.v1.HostAgentService.WriteFileStream:input_type -> hostagent.v1.WriteFileStreamRequest
29, // 17: hostagent.v1.HostAgentService.ReadFileStream:input_type -> hostagent.v1.ReadFileStreamRequest
- 1, // 18: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse
- 3, // 19: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse
- 5, // 20: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse
- 7, // 21: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse
- 13, // 22: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse
- 15, // 23: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse
- 18, // 24: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse
- 20, // 25: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse
- 9, // 26: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse
- 11, // 27: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse
- 22, // 28: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse
- 28, // 29: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse
- 30, // 30: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse
- 18, // [18:31] is the sub-list for method output_type
- 5, // [5:18] is the sub-list for method input_type
+ 31, // 18: hostagent.v1.HostAgentService.PingSandbox:input_type -> hostagent.v1.PingSandboxRequest
+ 1, // 19: hostagent.v1.HostAgentService.CreateSandbox:output_type -> hostagent.v1.CreateSandboxResponse
+ 3, // 20: hostagent.v1.HostAgentService.DestroySandbox:output_type -> hostagent.v1.DestroySandboxResponse
+ 5, // 21: hostagent.v1.HostAgentService.PauseSandbox:output_type -> hostagent.v1.PauseSandboxResponse
+ 7, // 22: hostagent.v1.HostAgentService.ResumeSandbox:output_type -> hostagent.v1.ResumeSandboxResponse
+ 13, // 23: hostagent.v1.HostAgentService.Exec:output_type -> hostagent.v1.ExecResponse
+ 15, // 24: hostagent.v1.HostAgentService.ListSandboxes:output_type -> hostagent.v1.ListSandboxesResponse
+ 18, // 25: hostagent.v1.HostAgentService.WriteFile:output_type -> hostagent.v1.WriteFileResponse
+ 20, // 26: hostagent.v1.HostAgentService.ReadFile:output_type -> hostagent.v1.ReadFileResponse
+ 9, // 27: hostagent.v1.HostAgentService.CreateSnapshot:output_type -> hostagent.v1.CreateSnapshotResponse
+ 11, // 28: hostagent.v1.HostAgentService.DeleteSnapshot:output_type -> hostagent.v1.DeleteSnapshotResponse
+ 22, // 29: hostagent.v1.HostAgentService.ExecStream:output_type -> hostagent.v1.ExecStreamResponse
+ 28, // 30: hostagent.v1.HostAgentService.WriteFileStream:output_type -> hostagent.v1.WriteFileStreamResponse
+ 30, // 31: hostagent.v1.HostAgentService.ReadFileStream:output_type -> hostagent.v1.ReadFileStreamResponse
+ 32, // 32: hostagent.v1.HostAgentService.PingSandbox:output_type -> hostagent.v1.PingSandboxResponse
+ 19, // [19:33] is the sub-list for method output_type
+ 5, // [5:19] is the sub-list for method input_type
5, // [5:5] is the sub-list for extension type_name
5, // [5:5] is the sub-list for extension extendee
0, // [0:5] is the sub-list for field type_name
@@ -1975,7 +2087,7 @@ func file_hostagent_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_hostagent_proto_rawDesc), len(file_hostagent_proto_rawDesc)),
NumEnums: 0,
- NumMessages: 31,
+ NumMessages: 33,
NumExtensions: 0,
NumServices: 1,
},
diff --git a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go
index f82f777..6eb5d45 100644
--- a/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go
+++ b/proto/hostagent/gen/hostagentv1connect/hostagent.connect.go
@@ -71,6 +71,9 @@ const (
// HostAgentServiceReadFileStreamProcedure is the fully-qualified name of the HostAgentService's
// ReadFileStream RPC.
HostAgentServiceReadFileStreamProcedure = "/hostagent.v1.HostAgentService/ReadFileStream"
+ // HostAgentServicePingSandboxProcedure is the fully-qualified name of the HostAgentService's
+ // PingSandbox RPC.
+ HostAgentServicePingSandboxProcedure = "/hostagent.v1.HostAgentService/PingSandbox"
)
// HostAgentServiceClient is a client for the hostagent.v1.HostAgentService service.
@@ -103,6 +106,8 @@ type HostAgentServiceClient interface {
WriteFileStream(context.Context) *connect.ClientStreamForClient[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse]
// ReadFileStream reads a file from a sandbox and streams it back in chunks.
ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest]) (*connect.ServerStreamForClient[gen.ReadFileStreamResponse], error)
+ // PingSandbox resets the inactivity timer for a running sandbox.
+ PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error)
}
// NewHostAgentServiceClient constructs a client for the hostagent.v1.HostAgentService service. By
@@ -194,6 +199,12 @@ func NewHostAgentServiceClient(httpClient connect.HTTPClient, baseURL string, op
connect.WithSchema(hostAgentServiceMethods.ByName("ReadFileStream")),
connect.WithClientOptions(opts...),
),
+ pingSandbox: connect.NewClient[gen.PingSandboxRequest, gen.PingSandboxResponse](
+ httpClient,
+ baseURL+HostAgentServicePingSandboxProcedure,
+ connect.WithSchema(hostAgentServiceMethods.ByName("PingSandbox")),
+ connect.WithClientOptions(opts...),
+ ),
}
}
@@ -212,6 +223,7 @@ type hostAgentServiceClient struct {
execStream *connect.Client[gen.ExecStreamRequest, gen.ExecStreamResponse]
writeFileStream *connect.Client[gen.WriteFileStreamRequest, gen.WriteFileStreamResponse]
readFileStream *connect.Client[gen.ReadFileStreamRequest, gen.ReadFileStreamResponse]
+ pingSandbox *connect.Client[gen.PingSandboxRequest, gen.PingSandboxResponse]
}
// CreateSandbox calls hostagent.v1.HostAgentService.CreateSandbox.
@@ -279,6 +291,11 @@ func (c *hostAgentServiceClient) ReadFileStream(ctx context.Context, req *connec
return c.readFileStream.CallServerStream(ctx, req)
}
+// PingSandbox calls hostagent.v1.HostAgentService.PingSandbox.
+func (c *hostAgentServiceClient) PingSandbox(ctx context.Context, req *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) {
+ return c.pingSandbox.CallUnary(ctx, req)
+}
+
// HostAgentServiceHandler is an implementation of the hostagent.v1.HostAgentService service.
type HostAgentServiceHandler interface {
// CreateSandbox boots a new microVM with the given configuration.
@@ -309,6 +326,8 @@ type HostAgentServiceHandler interface {
WriteFileStream(context.Context, *connect.ClientStream[gen.WriteFileStreamRequest]) (*connect.Response[gen.WriteFileStreamResponse], error)
// ReadFileStream reads a file from a sandbox and streams it back in chunks.
ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest], *connect.ServerStream[gen.ReadFileStreamResponse]) error
+ // PingSandbox resets the inactivity timer for a running sandbox.
+ PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error)
}
// NewHostAgentServiceHandler builds an HTTP handler from the service implementation. It returns the
@@ -396,6 +415,12 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han
connect.WithSchema(hostAgentServiceMethods.ByName("ReadFileStream")),
connect.WithHandlerOptions(opts...),
)
+ hostAgentServicePingSandboxHandler := connect.NewUnaryHandler(
+ HostAgentServicePingSandboxProcedure,
+ svc.PingSandbox,
+ connect.WithSchema(hostAgentServiceMethods.ByName("PingSandbox")),
+ connect.WithHandlerOptions(opts...),
+ )
return "/hostagent.v1.HostAgentService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case HostAgentServiceCreateSandboxProcedure:
@@ -424,6 +449,8 @@ func NewHostAgentServiceHandler(svc HostAgentServiceHandler, opts ...connect.Han
hostAgentServiceWriteFileStreamHandler.ServeHTTP(w, r)
case HostAgentServiceReadFileStreamProcedure:
hostAgentServiceReadFileStreamHandler.ServeHTTP(w, r)
+ case HostAgentServicePingSandboxProcedure:
+ hostAgentServicePingSandboxHandler.ServeHTTP(w, r)
default:
http.NotFound(w, r)
}
@@ -484,3 +511,7 @@ func (UnimplementedHostAgentServiceHandler) WriteFileStream(context.Context, *co
func (UnimplementedHostAgentServiceHandler) ReadFileStream(context.Context, *connect.Request[gen.ReadFileStreamRequest], *connect.ServerStream[gen.ReadFileStreamResponse]) error {
return connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.ReadFileStream is not implemented"))
}
+
+func (UnimplementedHostAgentServiceHandler) PingSandbox(context.Context, *connect.Request[gen.PingSandboxRequest]) (*connect.Response[gen.PingSandboxResponse], error) {
+ return nil, connect.NewError(connect.CodeUnimplemented, errors.New("hostagent.v1.HostAgentService.PingSandbox is not implemented"))
+}
diff --git a/proto/hostagent/hostagent.proto b/proto/hostagent/hostagent.proto
index 78c671a..b9ceccf 100644
--- a/proto/hostagent/hostagent.proto
+++ b/proto/hostagent/hostagent.proto
@@ -45,6 +45,10 @@ service HostAgentService {
// ReadFileStream reads a file from a sandbox and streams it back in chunks.
rpc ReadFileStream(ReadFileStreamRequest) returns (stream ReadFileStreamResponse);
+
+ // PingSandbox resets the inactivity timer for a running sandbox.
+ rpc PingSandbox(PingSandboxRequest) returns (PingSandboxResponse);
+
}
message CreateSandboxRequest {
@@ -60,8 +64,8 @@ message CreateSandboxRequest {
// Memory in MB (default: 512).
int32 memory_mb = 3;
- // TTL in seconds. Sandbox is auto-destroyed after this duration of
- // inactivity. 0 means no auto-destroy.
+ // TTL in seconds. Sandbox is auto-paused after this duration of
+ // inactivity. 0 means no auto-pause.
int32 timeout_sec = 4;
}
@@ -85,6 +89,10 @@ message PauseSandboxResponse {}
message ResumeSandboxRequest {
string sandbox_id = 1;
+
+ // TTL in seconds restored from the DB so the reaper can auto-pause
+ // the sandbox again after inactivity. 0 means no auto-pause.
+ int32 timeout_sec = 2;
}
message ResumeSandboxResponse {
@@ -127,6 +135,10 @@ message ListSandboxesRequest {}
message ListSandboxesResponse {
repeated SandboxInfo sandboxes = 1;
+
+ // IDs of sandboxes that were automatically paused by the TTL reaper
+ // since the last call. Drained on read.
+ repeated string auto_paused_sandbox_ids = 2;
}
message SandboxInfo {
@@ -215,3 +227,12 @@ message ReadFileStreamRequest {
message ReadFileStreamResponse {
bytes chunk = 1;
}
+
+// ── Ping ────────────────────────────────────────────────────────────
+
+message PingSandboxRequest {
+ string sandbox_id = 1;
+}
+
+message PingSandboxResponse {}
+
|