Add auto-pause TTL and ping endpoint for sandbox inactivity management
Replace the existing auto-destroy TTL behavior with auto-pause: when a
sandbox exceeds its timeout_sec of inactivity, the TTL reaper now pauses
it (snapshot + teardown) instead of destroying it, preserving the ability
to resume later.
Key changes:
- TTL reaper calls Pause instead of Destroy, with fallback to Destroy if
pause fails (e.g. Firecracker process already gone)
- New PingSandbox RPC resets the in-memory LastActiveAt timer
- New POST /v1/sandboxes/{id}/ping REST endpoint resets both agent memory
and DB last_active_at
- ListSandboxes RPC now includes auto_paused_sandbox_ids so the reconciler
can distinguish auto-paused sandboxes from crashed ones in a single call
- Reconciler polls every 5s (was 30s) and marks auto-paused as "paused"
vs orphaned as "stopped"
- Resume RPC accepts timeout_sec from DB so TTL survives pause/resume cycles
- Reaper checks every 2s (was 10s) and uses a detached context to avoid
incomplete pauses on app shutdown
- Default timeout_sec changed from 300 to 0 (no auto-pause unless requested)
This commit is contained in:
@ -45,6 +45,10 @@ service HostAgentService {
|
||||
|
||||
// ReadFileStream reads a file from a sandbox and streams it back in chunks.
|
||||
rpc ReadFileStream(ReadFileStreamRequest) returns (stream ReadFileStreamResponse);
|
||||
|
||||
// PingSandbox resets the inactivity timer for a running sandbox.
|
||||
rpc PingSandbox(PingSandboxRequest) returns (PingSandboxResponse);
|
||||
|
||||
}
|
||||
|
||||
message CreateSandboxRequest {
|
||||
@ -60,8 +64,8 @@ message CreateSandboxRequest {
|
||||
// Memory in MB (default: 512).
|
||||
int32 memory_mb = 3;
|
||||
|
||||
// TTL in seconds. Sandbox is auto-destroyed after this duration of
|
||||
// inactivity. 0 means no auto-destroy.
|
||||
// TTL in seconds. Sandbox is auto-paused after this duration of
|
||||
// inactivity. 0 means no auto-pause.
|
||||
int32 timeout_sec = 4;
|
||||
}
|
||||
|
||||
@ -85,6 +89,10 @@ message PauseSandboxResponse {}
|
||||
|
||||
message ResumeSandboxRequest {
|
||||
string sandbox_id = 1;
|
||||
|
||||
// TTL in seconds restored from the DB so the reaper can auto-pause
|
||||
// the sandbox again after inactivity. 0 means no auto-pause.
|
||||
int32 timeout_sec = 2;
|
||||
}
|
||||
|
||||
message ResumeSandboxResponse {
|
||||
@ -127,6 +135,10 @@ message ListSandboxesRequest {}
|
||||
|
||||
message ListSandboxesResponse {
|
||||
repeated SandboxInfo sandboxes = 1;
|
||||
|
||||
// IDs of sandboxes that were automatically paused by the TTL reaper
|
||||
// since the last call. Drained on read.
|
||||
repeated string auto_paused_sandbox_ids = 2;
|
||||
}
|
||||
|
||||
message SandboxInfo {
|
||||
@ -215,3 +227,12 @@ message ReadFileStreamRequest {
|
||||
message ReadFileStreamResponse {
|
||||
bytes chunk = 1;
|
||||
}
|
||||
|
||||
// ── Ping ────────────────────────────────────────────────────────────
|
||||
|
||||
message PingSandboxRequest {
|
||||
string sandbox_id = 1;
|
||||
}
|
||||
|
||||
message PingSandboxResponse {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user