1
0
forked from wrenn/wrenn
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev>

Reviewed-on: wrenn/wrenn#55
Co-authored-by: pptx704 <rafeed@omukk.dev>
Co-committed-by: pptx704 <rafeed@omukk.dev>
This commit is contained in:
2026-06-20 22:45:08 +00:00
committed by Rafeed M. Bhuiyan
parent cfc0c52010
commit a08e755e53
53 changed files with 1675 additions and 577 deletions

View File

@ -17,6 +17,21 @@ WRENN_HOST_INTERFACE=eth0
WRENN_CP_URL=http://localhost:9725 WRENN_CP_URL=http://localhost:9725
WRENN_DEFAULT_ROOTFS_SIZE=5Gi WRENN_DEFAULT_ROOTFS_SIZE=5Gi
WRENN_CH_BIN=/usr/local/bin/cloud-hypervisor WRENN_CH_BIN=/usr/local/bin/cloud-hypervisor
# Public domain sandboxes are served under; injected into envd so `envd ports`
# can build {port}-{sandbox_id}.{domain} URLs.
WRENN_PROXY_DOMAIN=wrenn.dev
# Inactivity activity sampler (all optional; shown values are the defaults).
# The host polls each running sandbox's guest liveness and refreshes its
# inactivity TTL when it is doing real work, so a long-running but
# non-interactive job (build, download) is not auto-paused. A sandbox counts
# as busy when guest CPU ≥ threshold, or net/disk throughput ≥ the floor.
# Busy requires the threshold to hold for 2 consecutive samples (debounced),
# so isolated idle-noise spikes do not keep a sandbox alive.
WRENN_ACTIVITY_SAMPLE_INTERVAL=5s
WRENN_CPU_BUSY_THRESHOLD=5.0
WRENN_NET_FLOOR_BPS=16384
WRENN_DISK_FLOOR_BPS=32768
# Auth # Auth
JWT_SECRET= JWT_SECRET=

View File

@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Project Overview ## Project Overview
Wrenn Sandbox is a microVM-based code execution platform. Users create isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Think E2B but with persistent sandboxes, pool-based pricing, and a single-binary deployment story. Wrenn is an open-source, self-hosted dev environment platform. Users spin up isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Fast boot, persistent state, and a single agent binary on each host you own.
## Build & Development Commands ## Build & Development Commands
@ -28,7 +28,7 @@ make dev-envd # envd in debug mode (port 49983)
make check # fmt + vet + lint + test (CI order) make check # fmt + vet + lint + test (CI order)
make test # Unit tests: go test -race -v ./internal/... make test # Unit tests: go test -race -v ./internal/...
make test-integration # Integration tests (require host agent + Cloud Hypervisor) make test-integration # Integration tests (require host agent + Cloud Hypervisor)
make fmt # gofmt make fmt # gofmt and rust fmt
make vet # go vet make vet # go vet
make lint # golangci-lint make lint # golangci-lint

View File

@ -106,6 +106,7 @@ sqlc:
fmt: fmt:
gofmt -w . gofmt -w .
cargo fmt --manifest-path envd-rs/Cargo.toml
lint: lint:
golangci-lint run ./... golangci-lint run ./...

View File

@ -2,6 +2,8 @@
Secure infrastructure for AI Secure infrastructure for AI
Wrenn is an open-source self-hosted dev environment platform. Each capsule is a fully isolated virtual machine — booted in seconds, persistent across sessions. Run the control plane anywhere, deploy a single agent binary on each compute host.
## Prerequisites ## Prerequisites
- Linux host with `/dev/kvm` access (bare metal or nested virt) - Linux host with `/dev/kvm` access (bare metal or nested virt)

View File

@ -1 +1 @@
0.2.0 0.2.1

View File

@ -1 +1 @@
0.2.0 0.2.1

View File

@ -148,6 +148,13 @@ func main() {
VMMBin: chBin, VMMBin: chBin,
VMMVersion: chVersion, VMMVersion: chVersion,
AgentVersion: version, AgentVersion: version,
ProxyDomain: envOrDefault("WRENN_PROXY_DOMAIN", "wrenn.dev"),
// Activity sampler tuning (all optional; zero → sandbox package default).
ActivitySampleInterval: envDuration("WRENN_ACTIVITY_SAMPLE_INTERVAL"),
CPUBusyPct: envFloat32("WRENN_CPU_BUSY_THRESHOLD"),
NetFloorBps: envUint64("WRENN_NET_FLOOR_BPS"),
DiskFloorBps: envUint64("WRENN_DISK_FLOOR_BPS"),
} }
// Remove any *.staging-* / *.trash-* directories left behind by a // Remove any *.staging-* / *.trash-* directories left behind by a
@ -171,6 +178,7 @@ func main() {
mgr.RestorePausedSandboxes() mgr.RestorePausedSandboxes()
mgr.StartTTLReaper(ctx) mgr.StartTTLReaper(ctx)
mgr.StartActivitySampler(ctx)
// httpServer is declared here so the shutdown func can reference it. // httpServer is declared here so the shutdown func can reference it.
// ReadTimeout/WriteTimeout are intentionally omitted — they would kill // ReadTimeout/WriteTimeout are intentionally omitted — they would kill
@ -311,6 +319,49 @@ func envOrDefault(key, def string) string {
return def return def
} }
// envDuration parses an optional duration env var (e.g. "5s"). Empty or
// invalid → zero, letting the sandbox package apply its default.
func envDuration(key string) time.Duration {
v := os.Getenv(key)
if v == "" {
return 0
}
d, err := time.ParseDuration(v)
if err != nil {
slog.Warn("invalid duration env var, using default", "key", key, "value", v)
return 0
}
return d
}
// envFloat32 parses an optional float env var. Empty or invalid → 0.
func envFloat32(key string) float32 {
v := os.Getenv(key)
if v == "" {
return 0
}
f, err := strconv.ParseFloat(v, 32)
if err != nil {
slog.Warn("invalid float env var, using default", "key", key, "value", v)
return 0
}
return float32(f)
}
// envUint64 parses an optional unsigned-int env var. Empty or invalid → 0.
func envUint64(key string) uint64 {
v := os.Getenv(key)
if v == "" {
return 0
}
n, err := strconv.ParseUint(v, 10, 64)
if err != nil {
slog.Warn("invalid uint env var, using default", "key", key, "value", v)
return 0
}
return n
}
// checkPrivileges verifies the process has the required Linux capabilities. // checkPrivileges verifies the process has the required Linux capabilities.
// Always reads CapEff — even for root — because a root process inside a // Always reads CapEff — even for root — because a root process inside a
// restricted container (e.g. docker --cap-drop=all) may not have all caps. // restricted container (e.g. docker --cap-drop=all) may not have all caps.

2
envd-rs/Cargo.lock generated
View File

@ -529,7 +529,7 @@ dependencies = [
[[package]] [[package]]
name = "envd" name = "envd"
version = "0.3.0" version = "0.4.0"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"axum", "axum",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "envd" name = "envd"
version = "0.3.0" version = "0.4.0"
edition = "2024" edition = "2024"
rust-version = "1.95" rust-version = "1.95"

View File

@ -14,6 +14,7 @@ const ACCESS_TOKEN_HEADER: &str = "x-access-token";
/// Format: "METHOD/path" /// Format: "METHOD/path"
const AUTH_EXCLUDED: &[&str] = &[ const AUTH_EXCLUDED: &[&str] = &[
"GET/health", "GET/health",
"GET/activity",
"GET/files", "GET/files",
"POST/files", "POST/files",
"POST/init", "POST/init",
@ -21,11 +22,7 @@ const AUTH_EXCLUDED: &[&str] = &[
]; ];
/// Axum middleware that checks X-Access-Token header. /// Axum middleware that checks X-Access-Token header.
pub async fn auth_layer( pub async fn auth_layer(request: Request, next: Next, access_token: Arc<SecureToken>) -> Response {
request: Request,
next: Next,
access_token: Arc<SecureToken>,
) -> Response {
if access_token.is_set() { if access_token.is_set() {
let method = request.method().as_str(); let method = request.method().as_str();
let path = request.uri().path(); let path = request.uri().path();

View File

@ -1,3 +1,3 @@
pub mod token;
pub mod signing;
pub mod middleware; pub mod middleware;
pub mod signing;
pub mod token;

View File

@ -140,13 +140,32 @@ mod tests {
#[test] #[test]
fn validate_correct_header_token() { fn validate_correct_header_token() {
let token = test_token(b"secret"); let token = test_token(b"secret");
assert!(validate_signing(&token, Some("secret"), None, None, "root", "/f", READ_OPERATION).is_ok()); assert!(
validate_signing(
&token,
Some("secret"),
None,
None,
"root",
"/f",
READ_OPERATION
)
.is_ok()
);
} }
#[test] #[test]
fn validate_wrong_header_token() { fn validate_wrong_header_token() {
let token = test_token(b"secret"); let token = test_token(b"secret");
let result = validate_signing(&token, Some("wrong"), None, None, "root", "/f", READ_OPERATION); let result = validate_signing(
&token,
Some("wrong"),
None,
None,
"root",
"/f",
READ_OPERATION,
);
assert!(result.is_err()); assert!(result.is_err());
assert!(result.unwrap_err().contains("does not match")); assert!(result.unwrap_err().contains("does not match"));
} }
@ -156,13 +175,32 @@ mod tests {
let token = test_token(b"secret"); let token = test_token(b"secret");
let exp = far_future(); let exp = far_future();
let sig = generate_signature(&token, "/file", "root", READ_OPERATION, Some(exp)).unwrap(); let sig = generate_signature(&token, "/file", "root", READ_OPERATION, Some(exp)).unwrap();
assert!(validate_signing(&token, None, Some(&sig), Some(exp), "root", "/file", READ_OPERATION).is_ok()); assert!(
validate_signing(
&token,
None,
Some(&sig),
Some(exp),
"root",
"/file",
READ_OPERATION
)
.is_ok()
);
} }
#[test] #[test]
fn validate_invalid_signature() { fn validate_invalid_signature() {
let token = test_token(b"secret"); let token = test_token(b"secret");
let result = validate_signing(&token, None, Some("v1_bad"), Some(far_future()), "root", "/f", READ_OPERATION); let result = validate_signing(
&token,
None,
Some("v1_bad"),
Some(far_future()),
"root",
"/f",
READ_OPERATION,
);
assert!(result.is_err()); assert!(result.is_err());
assert!(result.unwrap_err().contains("invalid signature")); assert!(result.unwrap_err().contains("invalid signature"));
} }
@ -172,7 +210,15 @@ mod tests {
let token = test_token(b"secret"); let token = test_token(b"secret");
let expired: i64 = 1_000_000; let expired: i64 = 1_000_000;
let sig = generate_signature(&token, "/f", "root", READ_OPERATION, Some(expired)).unwrap(); let sig = generate_signature(&token, "/f", "root", READ_OPERATION, Some(expired)).unwrap();
let result = validate_signing(&token, None, Some(&sig), Some(expired), "root", "/f", READ_OPERATION); let result = validate_signing(
&token,
None,
Some(&sig),
Some(expired),
"root",
"/f",
READ_OPERATION,
);
assert!(result.is_err()); assert!(result.is_err());
assert!(result.unwrap_err().contains("expired")); assert!(result.unwrap_err().contains("expired"));
} }
@ -197,7 +243,18 @@ mod tests {
fn validate_valid_signature_no_expiration() { fn validate_valid_signature_no_expiration() {
let token = test_token(b"secret"); let token = test_token(b"secret");
let sig = generate_signature(&token, "/file", "root", READ_OPERATION, None).unwrap(); let sig = generate_signature(&token, "/file", "root", READ_OPERATION, None).unwrap();
assert!(validate_signing(&token, None, Some(&sig), None, "root", "/file", READ_OPERATION).is_ok()); assert!(
validate_signing(
&token,
None,
Some(&sig),
None,
"root",
"/file",
READ_OPERATION
)
.is_ok()
);
} }
#[test] #[test]

View File

@ -19,20 +19,25 @@ pub struct Cgroup2Manager {
} }
impl Cgroup2Manager { impl Cgroup2Manager {
pub fn new(root: &str, configs: &[(ProcessType, &str, &[(&str, &str)])]) -> Result<Self, String> { pub fn new(
root: &str,
configs: &[(ProcessType, &str, &[(&str, &str)])],
) -> Result<Self, String> {
let mut fds = HashMap::new(); let mut fds = HashMap::new();
for (proc_type, sub_path, properties) in configs { for (proc_type, sub_path, properties) in configs {
let full_path = PathBuf::from(root).join(sub_path); let full_path = PathBuf::from(root).join(sub_path);
fs::create_dir_all(&full_path).map_err(|e| { fs::create_dir_all(&full_path)
format!("failed to create cgroup {}: {e}", full_path.display()) .map_err(|e| format!("failed to create cgroup {}: {e}", full_path.display()))?;
})?;
for (name, value) in *properties { for (name, value) in *properties {
let prop_path = full_path.join(name); let prop_path = full_path.join(name);
fs::write(&prop_path, value).map_err(|e| { fs::write(&prop_path, value).map_err(|e| {
format!("failed to write cgroup property {}: {e}", prop_path.display()) format!(
"failed to write cgroup property {}: {e}",
prop_path.display()
)
})?; })?;
} }

5
envd-rs/src/cmd/mod.rs Normal file
View File

@ -0,0 +1,5 @@
//! Client subcommands for the `envd` binary. These run as short-lived
//! invocations (e.g. `envd ports`) inside the guest, separate from the
//! long-running daemon, and exit when done.
pub mod ports;

164
envd-rs/src/cmd/ports.rs Normal file
View File

@ -0,0 +1,164 @@
//! `envd ports` — list the open ports inside the sandbox that are reachable
//! from outside, alongside the URL each is served at.
//!
//! Runs as a one-shot client (not the daemon): it scans `/proc/net/tcp[6]`
//! directly via the shared port helper and reads the sandbox identity that the
//! daemon recorded under /run/wrenn at /init time. It refuses to run outside a
//! wrenn sandbox.
use std::fs;
use std::path::Path;
use crate::config::{DEFAULT_PORT, DEFAULT_PROXY_DOMAIN, WRENN_RUN_DIR};
use crate::port::conn::reachable_listening_ports;
/// Arguments for the `envd ports` subcommand.
#[derive(clap::Args)]
pub struct PortsArgs {
/// Override the proxy domain used to build URLs (default: the domain
/// injected by the host, falling back to the built-in default).
#[arg(long)]
domain: Option<String>,
/// Emit JSON instead of a table.
#[arg(long)]
json: bool,
}
#[derive(serde::Serialize)]
struct PortEntry {
port: u32,
url: String,
}
/// Runs the subcommand and returns the desired process exit code.
pub fn run(args: &PortsArgs) -> i32 {
if !inside_sandbox() {
eprintln!("envd ports: not running inside a wrenn sandbox");
return 1;
}
let sandbox_id = read_identity("WRENN_SANDBOX_ID", ".WRENN_SANDBOX_ID");
let domain = args
.domain
.clone()
.filter(|d| !d.is_empty())
.or_else(|| read_identity("WRENN_PROXY_DOMAIN", ".WRENN_PROXY_DOMAIN"))
.unwrap_or_else(|| DEFAULT_PROXY_DOMAIN.to_string());
let entries: Vec<PortEntry> = reachable_listening_ports(DEFAULT_PORT as u32)
.into_iter()
.map(|port| PortEntry {
url: build_url(port, sandbox_id.as_deref(), &domain),
port,
})
.collect();
if args.json {
match serde_json::to_string_pretty(&entries) {
Ok(s) => println!("{s}"),
Err(e) => {
eprintln!("envd ports: failed to encode JSON: {e}");
return 1;
}
}
return 0;
}
if entries.is_empty() {
println!("No open ports.");
return 0;
}
println!("{:<6} {}", "PORT", "URL");
for e in &entries {
println!("{:<6} {}", e.port, e.url);
}
0
}
/// A wrenn sandbox is identified by the marker the daemon writes at startup
/// (`/run/wrenn/.WRENN_SANDBOX`) and the `WRENN_SANDBOX` env var it exports
/// into spawned processes. Running `envd ports` on a normal host finds neither
/// and is refused.
fn inside_sandbox() -> bool {
if std::env::var("WRENN_SANDBOX").as_deref() == Ok("true") {
return true;
}
Path::new(WRENN_RUN_DIR).join(".WRENN_SANDBOX").exists()
}
/// Reads an identity value from the environment, falling back to the matching
/// /run/wrenn file. Returns None when neither is set or both are blank.
fn read_identity(env_key: &str, file_name: &str) -> Option<String> {
if let Ok(v) = std::env::var(env_key) {
let v = v.trim().to_string();
if !v.is_empty() {
return Some(v);
}
}
match fs::read_to_string(Path::new(WRENN_RUN_DIR).join(file_name)) {
Ok(v) => {
let v = v.trim().to_string();
if v.is_empty() { None } else { Some(v) }
}
Err(_) => None,
}
}
/// Builds the externally-reachable URL for a port. With a known sandbox ID the
/// result is a working https URL; without it (identity not yet injected) the
/// sandbox-ID segment degrades to a `<sandbox-id>` placeholder so output is
/// still informative.
fn build_url(port: u32, sandbox_id: Option<&str>, domain: &str) -> String {
let id = sandbox_id.unwrap_or("<sandbox-id>");
format!("https://{port}-{id}.{domain}")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn url_with_sandbox_id() {
assert_eq!(
build_url(8000, Some("cl-abcd1234"), "wrenn.dev"),
"https://8000-cl-abcd1234.wrenn.dev"
);
}
#[test]
fn url_without_sandbox_id_uses_placeholder() {
assert_eq!(
build_url(5173, None, "wrenn.dev"),
"https://5173-<sandbox-id>.wrenn.dev"
);
}
#[test]
fn url_honors_custom_domain() {
assert_eq!(
build_url(3000, Some("cl-deadbeef"), "sandbox.example.com"),
"https://3000-cl-deadbeef.sandbox.example.com"
);
}
#[test]
fn read_identity_prefers_env() {
// SAFETY: test-local env var, single-threaded test body.
unsafe { std::env::set_var("ENVD_PORTS_TEST_ID", " cl-fromenv ") };
assert_eq!(
read_identity("ENVD_PORTS_TEST_ID", ".nonexistent-file"),
Some("cl-fromenv".to_string())
);
unsafe { std::env::remove_var("ENVD_PORTS_TEST_ID") };
}
#[test]
fn read_identity_none_when_unset() {
assert_eq!(
read_identity("ENVD_PORTS_TEST_UNSET", ".nonexistent-file"),
None
);
}
}

View File

@ -7,5 +7,10 @@ pub const PORT_SCANNER_INTERVAL: Duration = Duration::from_millis(1000);
pub const DEFAULT_USER: &str = "root"; pub const DEFAULT_USER: &str = "root";
pub const WRENN_RUN_DIR: &str = "/run/wrenn"; pub const WRENN_RUN_DIR: &str = "/run/wrenn";
/// Fallback proxy domain used by `envd ports` to build URLs when the host has
/// not injected one via /init. Matches the host agent's WRENN_PROXY_DOMAIN
/// default.
pub const DEFAULT_PROXY_DOMAIN: &str = "wrenn.dev";
pub const KILOBYTE: u64 = 1024; pub const KILOBYTE: u64 = 1024;
pub const MEGABYTE: u64 = 1024 * KILOBYTE; pub const MEGABYTE: u64 = 1024 * KILOBYTE;

View File

@ -1,3 +1,3 @@
pub mod hmac_sha256;
pub mod sha256; pub mod sha256;
pub mod sha512; pub mod sha512;
pub mod hmac_sha256;

View File

@ -20,14 +20,22 @@ mod tests {
const VECTORS: &[(&[u8], &str)] = &[ const VECTORS: &[(&[u8], &str)] = &[
(b"", "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"), (b"", "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"),
(b"abc", "ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0"), (b"abc", "ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0"),
(b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE"), (
b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
"JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE",
),
]; ];
#[test] #[test]
fn known_answer_with_prefix() { fn known_answer_with_prefix() {
for (input, expected_b64) in VECTORS { for (input, expected_b64) in VECTORS {
let result = hash(input); let result = hash(input);
assert_eq!(result, format!("$sha256${expected_b64}"), "input: {:?}", String::from_utf8_lossy(input)); assert_eq!(
result,
format!("$sha256${expected_b64}"),
"input: {:?}",
String::from_utf8_lossy(input)
);
} }
} }
@ -35,7 +43,12 @@ mod tests {
fn known_answer_without_prefix() { fn known_answer_without_prefix() {
for (input, expected_b64) in VECTORS { for (input, expected_b64) in VECTORS {
let result = hash_without_prefix(input); let result = hash_without_prefix(input);
assert_eq!(result, *expected_b64, "input: {:?}", String::from_utf8_lossy(input)); assert_eq!(
result,
*expected_b64,
"input: {:?}",
String::from_utf8_lossy(input)
);
} }
} }

View File

@ -15,9 +15,18 @@ mod tests {
use super::*; use super::*;
const VECTORS: &[(&str, &str)] = &[ const VECTORS: &[(&str, &str)] = &[
("", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"), (
("abc", "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"), "",
("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445"), "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
),
(
"abc",
"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f",
),
(
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
"204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445",
),
]; ];
#[test] #[test]
@ -30,7 +39,10 @@ mod tests {
#[test] #[test]
fn str_and_bytes_agree() { fn str_and_bytes_agree() {
for (input, _) in VECTORS { for (input, _) in VECTORS {
assert_eq!(hash_access_token(input), hash_access_token_bytes(input.as_bytes())); assert_eq!(
hash_access_token(input),
hash_access_token_bytes(input.as_bytes())
);
} }
} }
@ -38,6 +50,9 @@ mod tests {
fn output_is_lowercase_hex_128_chars() { fn output_is_lowercase_hex_128_chars() {
let h = hash_access_token("anything"); let h = hash_access_token("anything");
assert_eq!(h.len(), 128); assert_eq!(h.len(), 128);
assert!(h.chars().all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())); assert!(
h.chars()
.all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
);
} }
} }

View File

@ -62,7 +62,10 @@ mod tests {
#[test] #[test]
fn workdir_explicit_overrides_default() { fn workdir_explicit_overrides_default() {
assert_eq!(resolve_default_workdir("/explicit", Some("/default")), "/explicit"); assert_eq!(
resolve_default_workdir("/explicit", Some("/default")),
"/explicit"
);
} }
#[test] #[test]
@ -82,7 +85,10 @@ mod tests {
#[test] #[test]
fn username_explicit_returns_explicit() { fn username_explicit_returns_explicit() {
assert_eq!(resolve_default_username(Some("root"), "wrenn").unwrap(), "root"); assert_eq!(
resolve_default_username(Some("root"), "wrenn").unwrap(),
"root"
);
} }
#[test] #[test]

View File

@ -0,0 +1,37 @@
use std::sync::Arc;
use axum::Json;
use axum::extract::State;
use axum::http::header;
use axum::response::IntoResponse;
use serde::Serialize;
use crate::state::AppState;
/// Liveness snapshot the host activity sampler polls to decide whether a
/// sandbox is doing real work. All fields are served straight from atomics
/// updated by the 1s sampler thread — no syscalls per request, so the host
/// can poll cheaply at a few-second cadence.
#[derive(Serialize)]
pub struct Activity {
cpu_count: u32,
cpu_used_pct: f32,
net_bps: u64,
disk_bps: u64,
}
pub async fn get_activity(State(state): State<Arc<AppState>>) -> impl IntoResponse {
tracing::trace!("get activity");
let body = Activity {
cpu_count: state.cpu_count(),
cpu_used_pct: state.cpu_used_pct(),
net_bps: state.net_bps(),
disk_bps: state.disk_bps(),
};
(
[(header::CACHE_CONTROL, "no-store")],
Json(body),
)
}

View File

@ -20,7 +20,10 @@ fn parse_encoding_with_quality(value: &str) -> EncodingWithQuality {
let enc = value[..idx].trim(); let enc = value[..idx].trim();
for param in params.split(';') { for param in params.split(';') {
let param = param.trim(); let param = param.trim();
if let Some(stripped) = param.strip_prefix("q=").or_else(|| param.strip_prefix("Q=")) { if let Some(stripped) = param
.strip_prefix("q=")
.or_else(|| param.strip_prefix("Q="))
{
if let Ok(q) = stripped.parse::<f64>() { if let Ok(q) = stripped.parse::<f64>() {
quality = q; quality = q;
} }
@ -43,8 +46,10 @@ fn parse_accept_encoding_header(header: &str) -> (Vec<EncodingWithQuality>, bool
return (Vec::new(), false); return (Vec::new(), false);
} }
let encodings: Vec<EncodingWithQuality> = let encodings: Vec<EncodingWithQuality> = header
header.split(',').map(|v| parse_encoding_with_quality(v)).collect(); .split(',')
.map(|v| parse_encoding_with_quality(v))
.collect();
let mut identity_rejected = false; let mut identity_rejected = false;
let mut identity_explicitly_accepted = false; let mut identity_explicitly_accepted = false;
@ -97,7 +102,11 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
} }
let (mut encodings, identity_rejected) = parse_accept_encoding_header(header); let (mut encodings, identity_rejected) = parse_accept_encoding_header(header);
encodings.sort_by(|a, b| b.quality.partial_cmp(&a.quality).unwrap_or(std::cmp::Ordering::Equal)); encodings.sort_by(|a, b| {
b.quality
.partial_cmp(&a.quality)
.unwrap_or(std::cmp::Ordering::Equal)
});
for eq in &encodings { for eq in &encodings {
if eq.quality == 0.0 { if eq.quality == 0.0 {
@ -121,7 +130,9 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
return Ok(ENCODING_IDENTITY); return Ok(ENCODING_IDENTITY);
} }
Err(format!("no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}")) Err(format!(
"no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}"
))
} }
pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String> { pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String> {
@ -143,7 +154,9 @@ pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
return Ok(ENCODING_GZIP); return Ok(ENCODING_GZIP);
} }
Err(format!("unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}")) Err(format!(
"unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}"
))
} }
#[cfg(test)] #[cfg(test)]
@ -236,17 +249,26 @@ mod tests {
#[test] #[test]
fn accept_encoding_no_header_returns_identity() { fn accept_encoding_no_header_returns_identity() {
assert_eq!(parse_accept_encoding(&req_no_headers()).unwrap(), "identity"); assert_eq!(
parse_accept_encoding(&req_no_headers()).unwrap(),
"identity"
);
} }
#[test] #[test]
fn accept_encoding_gzip() { fn accept_encoding_gzip() {
assert_eq!(parse_accept_encoding(&req_with_accept("gzip")).unwrap(), "gzip"); assert_eq!(
parse_accept_encoding(&req_with_accept("gzip")).unwrap(),
"gzip"
);
} }
#[test] #[test]
fn accept_encoding_identity_explicit() { fn accept_encoding_identity_explicit() {
assert_eq!(parse_accept_encoding(&req_with_accept("identity")).unwrap(), "identity"); assert_eq!(
parse_accept_encoding(&req_with_accept("identity")).unwrap(),
"identity"
);
} }
#[test] #[test]
@ -259,7 +281,10 @@ mod tests {
#[test] #[test]
fn accept_encoding_wildcard_returns_identity() { fn accept_encoding_wildcard_returns_identity() {
assert_eq!(parse_accept_encoding(&req_with_accept("*")).unwrap(), "identity"); assert_eq!(
parse_accept_encoding(&req_with_accept("*")).unwrap(),
"identity"
);
} }
#[test] #[test]
@ -277,7 +302,10 @@ mod tests {
#[test] #[test]
fn accept_encoding_unsupported_only_falls_to_identity() { fn accept_encoding_unsupported_only_falls_to_identity() {
assert_eq!(parse_accept_encoding(&req_with_accept("br")).unwrap(), "identity"); assert_eq!(
parse_accept_encoding(&req_with_accept("br")).unwrap(),
"identity"
);
} }
// is_identity_acceptable // is_identity_acceptable
@ -311,17 +339,26 @@ mod tests {
#[test] #[test]
fn content_encoding_empty_returns_identity() { fn content_encoding_empty_returns_identity() {
assert_eq!(parse_content_encoding(&req_no_headers()).unwrap(), "identity"); assert_eq!(
parse_content_encoding(&req_no_headers()).unwrap(),
"identity"
);
} }
#[test] #[test]
fn content_encoding_gzip() { fn content_encoding_gzip() {
assert_eq!(parse_content_encoding(&req_with_content("gzip")).unwrap(), "gzip"); assert_eq!(
parse_content_encoding(&req_with_content("gzip")).unwrap(),
"gzip"
);
} }
#[test] #[test]
fn content_encoding_identity_explicit() { fn content_encoding_identity_explicit() {
assert_eq!(parse_content_encoding(&req_with_content("identity")).unwrap(), "identity"); assert_eq!(
parse_content_encoding(&req_with_content("identity")).unwrap(),
"identity"
);
} }
#[test] #[test]
@ -331,6 +368,9 @@ mod tests {
#[test] #[test]
fn content_encoding_case_insensitive() { fn content_encoding_case_insensitive() {
assert_eq!(parse_content_encoding(&req_with_content("GZIP")).unwrap(), "gzip"); assert_eq!(
parse_content_encoding(&req_with_content("GZIP")).unwrap(),
"gzip"
);
} }
} }

View File

@ -18,8 +18,5 @@ pub async fn get_envs(State(state): State<Arc<AppState>>) -> impl IntoResponse {
.map(|entry| (entry.key().clone(), entry.value().clone())) .map(|entry| (entry.key().clone(), entry.value().clone()))
.collect(); .collect();
( ([(header::CACHE_CONTROL, "no-store")], Json(envs))
[(header::CACHE_CONTROL, "no-store")],
Json(envs),
)
} }

View File

@ -72,10 +72,8 @@ pub async fn get_files(
let header_token = extract_header_token(&req); let header_token = extract_header_token(&req);
let default_user = state.defaults.user(); let default_user = state.defaults.user();
let username = match execcontext::resolve_default_username( let username =
params.username.as_deref(), match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
&default_user,
) {
Ok(u) => u.to_string(), Ok(u) => u.to_string(),
Err(e) => return json_error(StatusCode::BAD_REQUEST, e), Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
}; };
@ -98,8 +96,7 @@ pub async fn get_files(
let home_dir = user.dir.to_string_lossy().to_string(); let home_dir = user.dir.to_string_lossy().to_string();
let default_workdir = state.defaults.workdir(); let default_workdir = state.defaults.workdir();
let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref()) let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref()) {
{
Ok(p) => p, Ok(p) => p,
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e), Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
}; };
@ -177,8 +174,7 @@ pub async fn get_files(
.unwrap_or("application/octet-stream"); .unwrap_or("application/octet-stream");
if use_encoding == "gzip" { if use_encoding == "gzip" {
let mut encoder = let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
if let Err(e) = encoder.write_all(&file_data) { if let Err(e) = encoder.write_all(&file_data) {
return json_error( return json_error(
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,
@ -225,10 +221,8 @@ pub async fn post_files(
let header_token = extract_header_token(&req); let header_token = extract_header_token(&req);
let default_user = state.defaults.user(); let default_user = state.defaults.user();
let username = match execcontext::resolve_default_username( let username =
params.username.as_deref(), match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
&default_user,
) {
Ok(u) => u.to_string(), Ok(u) => u.to_string(),
Err(e) => return json_error(StatusCode::BAD_REQUEST, e), Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
}; };
@ -283,10 +277,7 @@ pub async fn post_files(
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e), Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
} }
} else { } else {
let fname = field let fname = field.file_name().unwrap_or("upload").to_string();
.file_name()
.unwrap_or("upload")
.to_string();
match expand_and_resolve(&fname, &home_dir, default_workdir.as_deref()) { match expand_and_resolve(&fname, &home_dir, default_workdir.as_deref()) {
Ok(p) => p, Ok(p) => p,
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e), Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
@ -382,7 +373,7 @@ fn process_file(
return Err(( return Err((
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,
format!("error getting file info: {e}"), format!("error getting file info: {e}"),
)) ));
} }
}; };
@ -395,7 +386,7 @@ fn process_file(
return Err(( return Err((
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,
format!("error changing ownership: {e}"), format!("error changing ownership: {e}"),
)) ));
} }
} }
} }

View File

@ -26,6 +26,9 @@ pub struct InitRequest {
pub volume_mounts: Option<Vec<VolumeMount>>, pub volume_mounts: Option<Vec<VolumeMount>>,
pub sandbox_id: Option<String>, pub sandbox_id: Option<String>,
pub template_id: Option<String>, pub template_id: Option<String>,
/// Public proxy domain (e.g. "wrenn.dev"). Used by `envd ports` to build
/// the {port}-{sandbox_id}.{domain} URLs.
pub proxy_domain: Option<String>,
/// New lifecycle identifier for this resume. When it changes between /// New lifecycle identifier for this resume. When it changes between
/// /init calls, envd treats the call as a post-resume hook: port /// /init calls, envd treats the call as a post-resume hook: port
/// forwarder is restarted and NFS mounts are refreshed. /// forwarder is restarted and NFS mounts are refreshed.
@ -183,14 +186,32 @@ pub async fn post_init(
// SAFETY: envd is single-threaded at init time; no concurrent env reads. // SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) }; unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) };
write_run_file(".WRENN_SANDBOX_ID", id); write_run_file(".WRENN_SANDBOX_ID", id);
state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone()); state
.defaults
.env_vars
.insert("WRENN_SANDBOX_ID".into(), id.clone());
} }
if let Some(ref id) = init_req.template_id { if let Some(ref id) = init_req.template_id {
tracing::debug!(template_id = %id, "setting template ID from init request"); tracing::debug!(template_id = %id, "setting template ID from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads. // SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) }; unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) };
write_run_file(".WRENN_TEMPLATE_ID", id); write_run_file(".WRENN_TEMPLATE_ID", id);
state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone()); state
.defaults
.env_vars
.insert("WRENN_TEMPLATE_ID".into(), id.clone());
}
if let Some(ref domain) = init_req.proxy_domain {
if !domain.is_empty() {
tracing::debug!(proxy_domain = %domain, "setting proxy domain from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_PROXY_DOMAIN", domain) };
write_run_file(".WRENN_PROXY_DOMAIN", domain);
state
.defaults
.env_vars
.insert("WRENN_PROXY_DOMAIN".into(), domain.clone());
}
} }
( (
@ -202,7 +223,10 @@ pub async fn post_init(
async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> { async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> {
// Fast path: matches existing token // Fast path: matches existing token
if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) { if state.access_token.is_set()
&& !request_token.is_empty()
&& state.access_token.equals(request_token)
{
return Ok(()); return Ok(());
} }
@ -241,10 +265,7 @@ async fn setup_hyperloop(address: &str, env_vars: &dashmap::DashMap<String, Stri
} }
} }
env_vars.insert( env_vars.insert("WRENN_EVENTS_ADDRESS".into(), format!("http://{address}"));
"WRENN_EVENTS_ADDRESS".into(),
format!("http://{address}"),
);
} }
async fn setup_nfs(nfs_target: &str, path: &str) { async fn setup_nfs(nfs_target: &str, path: &str) {
@ -287,7 +308,7 @@ async fn setup_nfs(nfs_target: &str, path: &str) {
} }
fn write_run_file(name: &str, value: &str) { fn write_run_file(name: &str, value: &str) {
let dir = std::path::Path::new("/run/wrenn"); let dir = std::path::Path::new(crate::config::WRENN_RUN_DIR);
if let Err(e) = std::fs::create_dir_all(dir) { if let Err(e) = std::fs::create_dir_all(dir) {
tracing::warn!(error = %e, "failed to create /run/wrenn"); tracing::warn!(error = %e, "failed to create /run/wrenn");
return; return;
@ -309,4 +330,3 @@ fn parse_timestamp_to_nanos(ts: &str) -> Result<i64, ()> {
} }
Err(()) Err(())
} }

View File

@ -1,3 +1,4 @@
pub mod activity;
pub mod encoding; pub mod encoding;
pub mod envs; pub mod envs;
pub mod error; pub mod error;
@ -13,8 +14,8 @@ use std::time::Duration;
use axum::Router; use axum::Router;
use axum::routing::{get, post}; use axum::routing::{get, post};
use http::header::{CACHE_CONTROL, HeaderName};
use http::Method; use http::Method;
use http::header::{CACHE_CONTROL, HeaderName};
use tower_http::cors::{AllowHeaders, AllowMethods, AllowOrigin, CorsLayer}; use tower_http::cors::{AllowHeaders, AllowMethods, AllowOrigin, CorsLayer};
use crate::config::CORS_MAX_AGE; use crate::config::CORS_MAX_AGE;
@ -47,6 +48,7 @@ pub fn router(state: Arc<AppState>) -> Router {
Router::new() Router::new()
.route("/health", get(health::get_health)) .route("/health", get(health::get_health))
.route("/activity", get(activity::get_activity))
.route("/metrics", get(metrics::get_metrics)) .route("/metrics", get(metrics::get_metrics))
.route("/envs", get(envs::get_envs)) .route("/envs", get(envs::get_envs))
.route("/init", post(init::post_init)) .route("/init", post(init::post_init))

View File

@ -2,6 +2,7 @@
mod auth; mod auth;
mod cgroups; mod cgroups;
mod cmd;
mod config; mod config;
mod conntracker; mod conntracker;
mod crypto; mod crypto;
@ -39,6 +40,10 @@ const COMMIT: &str = {
#[derive(Parser)] #[derive(Parser)]
#[command(name = "envd", about = "Wrenn guest agent daemon")] #[command(name = "envd", about = "Wrenn guest agent daemon")]
struct Cli { struct Cli {
/// Client subcommand. When omitted, envd runs as the guest daemon.
#[command(subcommand)]
command: Option<Commands>,
#[arg(long, default_value_t = DEFAULT_PORT)] #[arg(long, default_value_t = DEFAULT_PORT)]
port: u16, port: u16,
@ -55,6 +60,12 @@ struct Cli {
cgroup_root: String, cgroup_root: String,
} }
#[derive(clap::Subcommand)]
enum Commands {
/// List externally-reachable open ports and the URL each is served at.
Ports(cmd::ports::PortsArgs),
}
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let cli = Cli::parse(); let cli = Cli::parse();
@ -68,6 +79,11 @@ async fn main() {
return; return;
} }
// Client subcommands are short-lived: run and exit before any daemon setup.
if let Some(Commands::Ports(args)) = &cli.command {
std::process::exit(cmd::ports::run(args));
}
logging::init(true); logging::init(true);
if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) { if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) {
@ -85,8 +101,7 @@ async fn main() {
} }
// Cgroup manager // Cgroup manager
let cgroup_manager: Arc<dyn cgroups::CgroupManager> = let cgroup_manager: Arc<dyn cgroups::CgroupManager> = match cgroups::Cgroup2Manager::new(
match cgroups::Cgroup2Manager::new(
&cli.cgroup_root, &cli.cgroup_root,
&[ &[
( (
@ -138,8 +153,7 @@ async fn main() {
// RPC services (Connect protocol — serves Connect + gRPC + gRPC-Web on same port) // RPC services (Connect protocol — serves Connect + gRPC + gRPC-Web on same port)
let connect_router = rpc::rpc_router(Arc::clone(&state)); let connect_router = rpc::rpc_router(Arc::clone(&state));
let app = http::router(Arc::clone(&state)) let app = http::router(Arc::clone(&state)).fallback_service(connect_router.into_axum_service());
.fallback_service(connect_router.into_axum_service());
// --cmd: spawn initial process if specified // --cmd: spawn initial process if specified
if !cli.start_cmd.is_empty() { if !cli.start_cmd.is_empty() {
@ -151,7 +165,12 @@ async fn main() {
} }
let addr = SocketAddr::from(([0, 0, 0, 0], cli.port)); let addr = SocketAddr::from(([0, 0, 0, 0], cli.port));
tracing::info!(port = cli.port, version = VERSION, commit = COMMIT, "envd starting"); tracing::info!(
port = cli.port,
version = VERSION,
commit = COMMIT,
"envd starting"
);
let listener = TcpListener::bind(addr).await.expect("failed to bind"); let listener = TcpListener::bind(addr).await.expect("failed to bind");
@ -186,9 +205,7 @@ fn spawn_initial_command(cmd: &str, state: &AppState) {
let home = user.dir.to_string_lossy().to_string(); let home = user.dir.to_string_lossy().to_string();
let default_workdir = state.defaults.workdir(); let default_workdir = state.defaults.workdir();
let cwd = default_workdir let cwd = default_workdir.as_deref().unwrap_or(&home);
.as_deref()
.unwrap_or(&home);
match process_handler::spawn_process( match process_handler::spawn_process(
cmd, cmd,
@ -235,8 +252,7 @@ fn memory_reclaimer(_state: Arc<AppState>) {
} else { } else {
let mut sys2 = sysinfo::System::new(); let mut sys2 = sysinfo::System::new();
sys2.refresh_memory(); sys2.refresh_memory();
let freed_mb = let freed_mb = sys2.available_memory().saturating_sub(available) / (1024 * 1024);
sys2.available_memory().saturating_sub(available) / (1024 * 1024);
tracing::info!(used_pct, freed_mb, "page cache dropped"); tracing::info!(used_pct, freed_mb, "page cache dropped");
} }
} }

View File

@ -1,2 +1,2 @@
pub mod user;
pub mod path; pub mod path;
pub mod user;

View File

@ -94,7 +94,10 @@ mod tests {
#[test] #[test]
fn tilde_slash_path() { fn tilde_slash_path() {
assert_eq!(expand_tilde("~/docs", "/home/user").unwrap(), "/home/user/docs"); assert_eq!(
expand_tilde("~/docs", "/home/user").unwrap(),
"/home/user/docs"
);
} }
#[test] #[test]
@ -109,12 +112,18 @@ mod tests {
#[test] #[test]
fn tilde_relative_no_tilde() { fn tilde_relative_no_tilde() {
assert_eq!(expand_tilde("relative/path", "/home/u").unwrap(), "relative/path"); assert_eq!(
expand_tilde("relative/path", "/home/u").unwrap(),
"relative/path"
);
} }
#[test] #[test]
fn tilde_cmd_like() { fn tilde_cmd_like() {
assert_eq!(expand_tilde("~/bin/myapp", "/home/user").unwrap(), "/home/user/bin/myapp"); assert_eq!(
expand_tilde("~/bin/myapp", "/home/user").unwrap(),
"/home/user/bin/myapp"
);
} }
#[test] #[test]
@ -144,12 +153,18 @@ mod tests {
#[test] #[test]
fn resolve_absolute_passthrough() { fn resolve_absolute_passthrough() {
assert_eq!(expand_and_resolve("/abs/path", "/home", None).unwrap(), "/abs/path"); assert_eq!(
expand_and_resolve("/abs/path", "/home", None).unwrap(),
"/abs/path"
);
} }
#[test] #[test]
fn resolve_empty_uses_default() { fn resolve_empty_uses_default() {
assert_eq!(expand_and_resolve("", "/home", Some("/default")).unwrap(), "/default"); assert_eq!(
expand_and_resolve("", "/home", Some("/default")).unwrap(),
"/default"
);
} }
#[test] #[test]
@ -161,7 +176,10 @@ mod tests {
#[test] #[test]
fn resolve_tilde_expands() { fn resolve_tilde_expands() {
assert_eq!(expand_and_resolve("~/dir", "/home/u", None).unwrap(), "/home/u/dir"); assert_eq!(
expand_and_resolve("~/dir", "/home/u", None).unwrap(),
"/home/u/dir"
);
} }
#[test] #[test]

View File

@ -37,6 +37,36 @@ pub fn read_tcp_connections() -> Vec<ConnStat> {
conns conns
} }
/// Returns the TCP ports in LISTEN state that are reachable from outside the
/// guest through the host proxy. A port qualifies when it is bound to a
/// wildcard address (`0.0.0.0`/`::`, directly reachable on the TAP interface)
/// or to loopback (`127.0.0.1`/`::1`, bridged to the TAP IP by the socat
/// forwarder). Ports bound to any other specific address are not routable from
/// the host and are excluded, as is `exclude_port` (envd's own control port).
/// The result is deduplicated and sorted ascending.
pub fn reachable_listening_ports(exclude_port: u32) -> Vec<u32> {
filter_reachable_ports(&read_tcp_connections(), exclude_port)
}
fn filter_reachable_ports(conns: &[ConnStat], exclude_port: u32) -> Vec<u32> {
let mut ports: Vec<u32> = conns
.iter()
.filter(|c| c.status == "LISTEN")
.filter(|c| is_reachable_bind(&c.local_ip))
.map(|c| c.local_port)
.filter(|p| *p != exclude_port)
.collect();
ports.sort_unstable();
ports.dedup();
ports
}
/// A bind address is reachable from the host when it is a wildcard (directly
/// routed via the TAP interface) or loopback (socat-forwarded to the TAP IP).
fn is_reachable_bind(ip: &str) -> bool {
matches!(ip, "0.0.0.0" | "::" | "127.0.0.1" | "::1")
}
fn parse_proc_net_tcp(path: &str, family: u32) -> io::Result<Vec<ConnStat>> { fn parse_proc_net_tcp(path: &str, family: u32) -> io::Result<Vec<ConnStat>> {
let file = std::fs::File::open(path)?; let file = std::fs::File::open(path)?;
let reader = io::BufReader::new(file); let reader = io::BufReader::new(file);
@ -92,7 +122,10 @@ fn parse_hex_addr(s: &str, family: u32) -> Option<(String, u32)> {
if ip_bytes.len() != 4 { if ip_bytes.len() != 4 {
return None; return None;
} }
format!("{}.{}.{}.{}", ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0]) format!(
"{}.{}.{}.{}",
ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0]
)
} else { } else {
if ip_bytes.len() != 16 { if ip_bytes.len() != 16 {
return None; return None;
@ -257,4 +290,76 @@ mod tests {
fn parse_nonexistent_file_errors() { fn parse_nonexistent_file_errors() {
assert!(parse_proc_net_tcp("/nonexistent/path", libc::AF_INET as u32).is_err()); assert!(parse_proc_net_tcp("/nonexistent/path", libc::AF_INET as u32).is_err());
} }
// reachable port filtering
fn conn(ip: &str, port: u32, status: &str) -> ConnStat {
ConnStat {
local_ip: ip.to_string(),
local_port: port,
status: status.to_string(),
family: libc::AF_INET as u32,
inode: 0,
}
}
#[test]
fn reachable_bind_accepts_wildcard_and_loopback() {
assert!(is_reachable_bind("0.0.0.0"));
assert!(is_reachable_bind("::"));
assert!(is_reachable_bind("127.0.0.1"));
assert!(is_reachable_bind("::1"));
}
#[test]
fn reachable_bind_rejects_specific_address() {
assert!(!is_reachable_bind("192.168.1.5"));
assert!(!is_reachable_bind("169.254.0.21"));
assert!(!is_reachable_bind("10.0.0.1"));
}
#[test]
fn filter_keeps_only_listen_state() {
let conns = vec![
conn("0.0.0.0", 8000, "LISTEN"),
conn("0.0.0.0", 9000, "ESTABLISHED"),
];
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
}
#[test]
fn filter_excludes_unreachable_binds() {
let conns = vec![
conn("127.0.0.1", 8000, "LISTEN"),
conn("169.254.0.21", 8001, "LISTEN"), // socat's own listener
conn("192.168.1.5", 8002, "LISTEN"),
];
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
}
#[test]
fn filter_excludes_envd_control_port() {
let conns = vec![
conn("0.0.0.0", 49983, "LISTEN"),
conn("0.0.0.0", 8000, "LISTEN"),
];
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
}
#[test]
fn filter_dedups_and_sorts() {
// Same port on IPv4 wildcard and IPv6 loopback collapses to one entry.
let conns = vec![
conn("::1", 8000, "LISTEN"),
conn("0.0.0.0", 8000, "LISTEN"),
conn("0.0.0.0", 3000, "LISTEN"),
];
assert_eq!(filter_reachable_ports(&conns, 49983), vec![3000, 8000]);
}
#[test]
fn filter_empty_when_no_listeners() {
let conns = vec![conn("0.0.0.0", 8000, "ESTABLISHED")];
assert!(filter_reachable_ports(&conns, 49983).is_empty());
}
} }

View File

@ -53,9 +53,7 @@ pub fn build_entry_info(path: &str) -> Result<EntryInfo, ConnectError> {
Err(_) => FileType::FILE_TYPE_UNSPECIFIED, Err(_) => FileType::FILE_TYPE_UNSPECIFIED,
}; };
let target_mode = std::fs::metadata(p) let target_mode = std::fs::metadata(p).map(|m| m.mode() & 0o7777).unwrap_or(0);
.map(|m| m.mode() & 0o7777)
.unwrap_or(0);
(target_type, target_mode, Some(target)) (target_type, target_mode, Some(target))
} else { } else {

View File

@ -98,8 +98,7 @@ impl Filesystem for FilesystemServiceImpl {
} }
let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user()); let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
let user = let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
ensure_dirs(&path, user.uid, user.gid) ensure_dirs(&path, user.uid, user.gid)
.map_err(|e| ConnectError::new(ErrorCode::Internal, e))?; .map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
@ -123,8 +122,7 @@ impl Filesystem for FilesystemServiceImpl {
let destination = self.resolve_path(request.destination, &ctx)?; let destination = self.resolve_path(request.destination, &ctx)?;
let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user()); let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
let user = let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
if let Some(parent) = Path::new(&destination).parent() { if let Some(parent) = Path::new(&destination).parent() {
ensure_dirs(&parent.to_string_lossy(), user.uid, user.gid) ensure_dirs(&parent.to_string_lossy(), user.uid, user.gid)
@ -206,7 +204,12 @@ impl Filesystem for FilesystemServiceImpl {
} }
} }
Ok((RemoveResponse { ..Default::default() }, ctx)) Ok((
RemoveResponse {
..Default::default()
},
ctx,
))
} }
async fn watch_dir( async fn watch_dir(
@ -247,8 +250,8 @@ impl Filesystem for FilesystemServiceImpl {
let events: Arc<Mutex<Vec<FilesystemEvent>>> = Arc::new(Mutex::new(Vec::new())); let events: Arc<Mutex<Vec<FilesystemEvent>>> = Arc::new(Mutex::new(Vec::new()));
let events_cb = Arc::clone(&events); let events_cb = Arc::clone(&events);
let mut watcher = notify::recommended_watcher( let mut watcher =
move |res: Result<notify::Event, notify::Error>| { notify::recommended_watcher(move |res: Result<notify::Event, notify::Error>| {
if let Ok(event) = res { if let Ok(event) = res {
let event_type = match event.kind { let event_type = match event.kind {
notify::EventKind::Create(_) => EventType::EVENT_TYPE_CREATE, notify::EventKind::Create(_) => EventType::EVENT_TYPE_CREATE,
@ -275,10 +278,12 @@ impl Filesystem for FilesystemServiceImpl {
} }
} }
} }
}, })
)
.map_err(|e| { .map_err(|e| {
ConnectError::new(ErrorCode::Internal, format!("failed to create watcher: {e}")) ConnectError::new(
ErrorCode::Internal,
format!("failed to create watcher: {e}"),
)
})?; })?;
let mode = if recursive { let mode = if recursive {
@ -342,7 +347,12 @@ impl Filesystem for FilesystemServiceImpl {
) -> Result<(RemoveWatcherResponse, Context), ConnectError> { ) -> Result<(RemoveWatcherResponse, Context), ConnectError> {
let watcher_id: &str = request.watcher_id; let watcher_id: &str = request.watcher_id;
self.watchers.remove(watcher_id); self.watchers.remove(watcher_id);
Ok((RemoveWatcherResponse { ..Default::default() }, ctx)) Ok((
RemoveWatcherResponse {
..Default::default()
},
ctx,
))
} }
} }

View File

@ -1,17 +1,17 @@
pub mod pb;
pub mod entry; pub mod entry;
pub mod filesystem_service;
pub mod pb;
pub mod process_handler; pub mod process_handler;
pub mod process_service; pub mod process_service;
pub mod filesystem_service;
use std::sync::Arc; use std::sync::Arc;
use crate::rpc::process_service::ProcessServiceImpl;
use crate::rpc::filesystem_service::FilesystemServiceImpl; use crate::rpc::filesystem_service::FilesystemServiceImpl;
use crate::rpc::process_service::ProcessServiceImpl;
use crate::state::AppState; use crate::state::AppState;
use pb::process::ProcessExt;
use pb::filesystem::FilesystemExt; use pb::filesystem::FilesystemExt;
use pb::process::ProcessExt;
/// Build the connect-rust Router with both RPC services registered. /// Build the connect-rust Router with both RPC services registered.
pub fn rpc_router(state: Arc<AppState>) -> connectrpc::Router { pub fn rpc_router(state: Arc<AppState>) -> connectrpc::Router {

View File

@ -1,4 +1,9 @@
#![allow(dead_code, non_camel_case_types, unused_imports, clippy::derivable_impls)] #![allow(
dead_code,
non_camel_case_types,
unused_imports,
clippy::derivable_impls
)]
use ::buffa; use ::buffa;
use ::buffa_types; use ::buffa_types;

View File

@ -1,10 +1,11 @@
use std::collections::VecDeque;
use std::io::Read; use std::io::Read;
use std::os::unix::process::CommandExt; use std::os::unix::process::CommandExt;
use std::process::Stdio; use std::process::Stdio;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use connectrpc::{ConnectError, ErrorCode}; use connectrpc::{ConnectError, ErrorCode};
use nix::pty::{openpty, Winsize}; use nix::pty::{Winsize, openpty};
use nix::sys::signal::{self, Signal}; use nix::sys::signal::{self, Signal};
use nix::unistd::Pid; use nix::unistd::Pid;
use tokio::sync::broadcast; use tokio::sync::broadcast;
@ -15,6 +16,11 @@ const STD_CHUNK_SIZE: usize = 32768;
const PTY_CHUNK_SIZE: usize = 16384; const PTY_CHUNK_SIZE: usize = 16384;
const BROADCAST_CAPACITY: usize = 4096; const BROADCAST_CAPACITY: usize = 4096;
// Upper bound on the per-process output kept for replay. A late Connect gets
// the most recent OUTPUT_LOG_CAPACITY bytes (older output is evicted) so the
// buffer can never grow without bound for a chatty long-running process.
const OUTPUT_LOG_CAPACITY: usize = 256 * 1024;
#[derive(Clone)] #[derive(Clone)]
pub enum DataEvent { pub enum DataEvent {
Stdout(Vec<u8>), Stdout(Vec<u8>),
@ -30,6 +36,37 @@ pub struct EndEvent {
pub error: Option<String>, pub error: Option<String>,
} }
/// Bounded ring of recent output, kept so a late Connect can replay what it
/// missed. Evicts oldest events once the retained bytes exceed the cap.
#[derive(Default)]
struct OutputLog {
events: VecDeque<DataEvent>,
bytes: usize,
}
impl OutputLog {
fn push(&mut self, ev: &DataEvent) {
self.bytes += ev_len(ev);
self.events.push_back(ev.clone());
while self.bytes > OUTPUT_LOG_CAPACITY {
match self.events.pop_front() {
Some(old) => self.bytes -= ev_len(&old),
None => break,
}
}
}
fn snapshot(&self) -> Vec<DataEvent> {
self.events.iter().cloned().collect()
}
}
fn ev_len(ev: &DataEvent) -> usize {
match ev {
DataEvent::Stdout(d) | DataEvent::Stderr(d) | DataEvent::Pty(d) => d.len(),
}
}
pub struct ProcessHandle { pub struct ProcessHandle {
pub config: ProcessConfig, pub config: ProcessConfig,
pub tag: Option<String>, pub tag: Option<String>,
@ -38,6 +75,7 @@ pub struct ProcessHandle {
data_tx: broadcast::Sender<DataEvent>, data_tx: broadcast::Sender<DataEvent>,
end_tx: broadcast::Sender<EndEvent>, end_tx: broadcast::Sender<EndEvent>,
ended: Mutex<Option<EndEvent>>, ended: Mutex<Option<EndEvent>>,
output_log: Mutex<OutputLog>,
stdin: Mutex<Option<std::process::ChildStdin>>, stdin: Mutex<Option<std::process::ChildStdin>>,
pty_master: Mutex<Option<std::fs::File>>, pty_master: Mutex<Option<std::fs::File>>,
@ -48,6 +86,26 @@ impl ProcessHandle {
self.data_tx.subscribe() self.data_tx.subscribe()
} }
/// Append a chunk to the replay buffer and broadcast it live, under one
/// lock. The shared lock is what makes [`subscribe_data_replay`] race-free:
/// a concurrent attach sees this chunk either in its snapshot or on its live
/// receiver — never both, never neither.
pub fn publish_data(&self, ev: DataEvent) {
let mut log = self.output_log.lock().unwrap();
log.push(&ev);
let _ = self.data_tx.send(ev);
}
/// Snapshot the buffered output and subscribe to live output atomically, so
/// a late Connect replays what it missed and then continues live with no gap
/// or duplicate across the handoff.
pub fn subscribe_data_replay(&self) -> (Vec<DataEvent>, broadcast::Receiver<DataEvent>) {
let log = self.output_log.lock().unwrap();
let snapshot = log.snapshot();
let rx = self.data_tx.subscribe();
(snapshot, rx)
}
pub fn subscribe_end(&self) -> broadcast::Receiver<EndEvent> { pub fn subscribe_end(&self) -> broadcast::Receiver<EndEvent> {
self.end_tx.subscribe() self.end_tx.subscribe()
} }
@ -160,6 +218,9 @@ pub fn spawn_process(
env.push(("HOME".into(), home)); env.push(("HOME".into(), home));
env.push(("USER".into(), user.name.clone())); env.push(("USER".into(), user.name.clone()));
env.push(("LOGNAME".into(), user.name.clone())); env.push(("LOGNAME".into(), user.name.clone()));
if !user.shell.as_os_str().is_empty() {
env.push(("SHELL".into(), user.shell.to_string_lossy().to_string()));
}
default_env_vars.iter().for_each(|entry| { default_env_vars.iter().for_each(|entry| {
env.push((entry.key().clone(), entry.value().clone())); env.push((entry.key().clone(), entry.value().clone()));
@ -179,21 +240,40 @@ pub fn spawn_process(
let nice_delta = 0 - current_nice(); let nice_delta = 0 - current_nice();
let profile_source = r#"test -f /etc/profile && . /etc/profile let profile_source = r#"test -f /etc/profile && . /etc/profile
test -f "${HOME}/.bashrc" && . "${HOME}/.bashrc""#; test -f "${HOME}/.bashrc" && . "${HOME}/.bashrc""#;
let oom_script = if nice_delta > 0 {
format!( // Resolve the user's login shell, falling back to /bin/sh. Commands without
r#"echo 100 > /proc/$$/oom_score_adj // explicit args are interpreted by this shell so pipes, quoting, escape
{} // sequences, backslash line-continuations, and other shell syntax work
exec /usr/bin/nice -n {} "${{@}}""#, // without the caller having to wrap them in `sh -c` themselves.
profile_source, nice_delta, let shell = {
) let s = user.shell.to_string_lossy();
if s.is_empty() {
"/bin/sh".to_string()
} else { } else {
format!( s.to_string()
r#"echo 100 > /proc/$$/oom_score_adj }
{}
exec "$@""#,
profile_source
)
}; };
// What the wrapper finally exec's, after the optional `nice` prefix.
// - no args: run cmd_str as a shell command line via the login shell
// ($1 is cmd_str; $0 of the inner shell is the shell path).
// - with args: exec the program + args directly, no shell interpretation
// (backward-compatible program/argv form).
let target = if args.is_empty() {
format!(r#""{shell}" -c "$1" "{shell}""#)
} else {
r#""$@""#.to_string()
};
let nice_prefix = if nice_delta > 0 {
format!("/usr/bin/nice -n {nice_delta} ")
} else {
String::new()
};
let oom_script = format!(
r#"echo 100 > /proc/$$/oom_score_adj
{profile_source}
exec {nice_prefix}{target}"#
);
let mut wrapper_args = vec![ let mut wrapper_args = vec![
"-c".to_string(), "-c".to_string(),
oom_script, oom_script,
@ -264,7 +344,10 @@ exec "$@""#,
command.stderr(Stdio::null()); command.stderr(Stdio::null());
let child = command.spawn().map_err(|e| { let child = command.spawn().map_err(|e| {
ConnectError::new(ErrorCode::Internal, format!("error starting pty process: {e}")) ConnectError::new(
ErrorCode::Internal,
format!("error starting pty process: {e}"),
)
})?; })?;
drop(slave_fd); drop(slave_fd);
@ -280,6 +363,7 @@ exec "$@""#,
data_tx: data_tx.clone(), data_tx: data_tx.clone(),
end_tx: end_tx.clone(), end_tx: end_tx.clone(),
ended: Mutex::new(None), ended: Mutex::new(None),
output_log: Mutex::new(OutputLog::default()),
stdin: Mutex::new(None), stdin: Mutex::new(None),
pty_master: Mutex::new(Some(master_file)), pty_master: Mutex::new(Some(master_file)),
}); });
@ -287,7 +371,7 @@ exec "$@""#,
let data_rx = handle.subscribe_data(); let data_rx = handle.subscribe_data();
let end_rx = handle.subscribe_end(); let end_rx = handle.subscribe_end();
let data_tx_clone = data_tx.clone(); let handle_for_reader = Arc::clone(&handle);
let pty_reader = std::thread::spawn(move || { let pty_reader = std::thread::spawn(move || {
let mut master = master_clone; let mut master = master_clone;
let mut buf = vec![0u8; PTY_CHUNK_SIZE]; let mut buf = vec![0u8; PTY_CHUNK_SIZE];
@ -295,7 +379,7 @@ exec "$@""#,
match master.read(&mut buf) { match master.read(&mut buf) {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
let _ = data_tx_clone.send(DataEvent::Pty(buf[..n].to_vec())); handle_for_reader.publish_data(DataEvent::Pty(buf[..n].to_vec()));
} }
Err(_) => break, Err(_) => break,
} }
@ -329,7 +413,11 @@ exec "$@""#,
}); });
tracing::info!(pid, cmd = cmd_str, "process started (pty)"); tracing::info!(pid, cmd = cmd_str, "process started (pty)");
Ok(SpawnedProcess { handle, data_rx, end_rx }) Ok(SpawnedProcess {
handle,
data_rx,
end_rx,
})
} else { } else {
let mut command = std::process::Command::new("/bin/bash"); let mut command = std::process::Command::new("/bin/bash");
command command
@ -375,6 +463,7 @@ exec "$@""#,
data_tx: data_tx.clone(), data_tx: data_tx.clone(),
end_tx: end_tx.clone(), end_tx: end_tx.clone(),
ended: Mutex::new(None), ended: Mutex::new(None),
output_log: Mutex::new(OutputLog::default()),
stdin: Mutex::new(stdin), stdin: Mutex::new(stdin),
pty_master: Mutex::new(None), pty_master: Mutex::new(None),
}); });
@ -385,14 +474,14 @@ exec "$@""#,
let mut output_readers: Vec<std::thread::JoinHandle<()>> = Vec::new(); let mut output_readers: Vec<std::thread::JoinHandle<()>> = Vec::new();
if let Some(mut out) = stdout { if let Some(mut out) = stdout {
let tx = data_tx.clone(); let handle_for_reader = Arc::clone(&handle);
output_readers.push(std::thread::spawn(move || { output_readers.push(std::thread::spawn(move || {
let mut buf = vec![0u8; STD_CHUNK_SIZE]; let mut buf = vec![0u8; STD_CHUNK_SIZE];
loop { loop {
match out.read(&mut buf) { match out.read(&mut buf) {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
let _ = tx.send(DataEvent::Stdout(buf[..n].to_vec())); handle_for_reader.publish_data(DataEvent::Stdout(buf[..n].to_vec()));
} }
Err(_) => break, Err(_) => break,
} }
@ -401,14 +490,14 @@ exec "$@""#,
} }
if let Some(mut err_pipe) = stderr { if let Some(mut err_pipe) = stderr {
let tx = data_tx.clone(); let handle_for_reader = Arc::clone(&handle);
output_readers.push(std::thread::spawn(move || { output_readers.push(std::thread::spawn(move || {
let mut buf = vec![0u8; STD_CHUNK_SIZE]; let mut buf = vec![0u8; STD_CHUNK_SIZE];
loop { loop {
match err_pipe.read(&mut buf) { match err_pipe.read(&mut buf) {
Ok(0) => break, Ok(0) => break,
Ok(n) => { Ok(n) => {
let _ = tx.send(DataEvent::Stderr(buf[..n].to_vec())); handle_for_reader.publish_data(DataEvent::Stderr(buf[..n].to_vec()));
} }
Err(_) => break, Err(_) => break,
} }
@ -444,7 +533,11 @@ exec "$@""#,
}); });
tracing::info!(pid, cmd = cmd_str, "process started (pipe)"); tracing::info!(pid, cmd = cmd_str, "process started (pipe)");
Ok(SpawnedProcess { handle, data_rx, end_rx }) Ok(SpawnedProcess {
handle,
data_rx,
end_rx,
})
} }
} }

View File

@ -4,7 +4,8 @@ use std::sync::Arc;
use connectrpc::{ConnectError, Context, ErrorCode}; use connectrpc::{ConnectError, Context, ErrorCode};
use dashmap::DashMap; use dashmap::DashMap;
use futures::Stream; use futures::{Stream, StreamExt};
use tokio::sync::broadcast;
use crate::permissions::path::{expand_and_resolve, expand_tilde}; use crate::permissions::path::{expand_and_resolve, expand_tilde};
use crate::permissions::user::lookup_user; use crate::permissions::user::lookup_user;
@ -72,8 +73,7 @@ impl ProcessServiceImpl {
})?; })?;
let username = self.state.defaults.user(); let username = self.state.defaults.user();
let user = let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
let cmd_raw: &str = proc_config.cmd; let cmd_raw: &str = proc_config.cmd;
let args_raw: Vec<String> = proc_config.args.iter().map(|s| s.to_string()).collect(); let args_raw: Vec<String> = proc_config.args.iter().map(|s| s.to_string()).collect();
@ -87,7 +87,8 @@ impl ProcessServiceImpl {
let cmd = expand_tilde(cmd_raw, &home_dir) let cmd = expand_tilde(cmd_raw, &home_dir)
.map_err(|e| ConnectError::new(ErrorCode::InvalidArgument, e))?; .map_err(|e| ConnectError::new(ErrorCode::InvalidArgument, e))?;
let args: Vec<String> = args_raw.into_iter() let args: Vec<String> = args_raw
.into_iter()
.map(|a| expand_tilde(&a, &home_dir).unwrap_or(a)) .map(|a| expand_tilde(&a, &home_dir).unwrap_or(a))
.collect(); .collect();
@ -136,7 +137,8 @@ impl ProcessServiceImpl {
&self.state.defaults.env_vars, &self.state.defaults.env_vars,
)?; )?;
self.processes.insert(spawned.handle.pid, Arc::clone(&spawned.handle)); self.processes
.insert(spawned.handle.pid, Arc::clone(&spawned.handle));
let processes = Arc::clone(&self.processes); let processes = Arc::clone(&self.processes);
let pid = spawned.handle.pid; let pid = spawned.handle.pid;
@ -203,50 +205,10 @@ impl Process for ProcessServiceImpl {
let spawned = self.spawn_from_request(&request)?; let spawned = self.spawn_from_request(&request)?;
let pid = spawned.handle.pid; let pid = spawned.handle.pid;
let mut data_rx = spawned.data_rx; // Start subscribes before any output is produced, so there is nothing to
let mut end_rx = spawned.end_rx; // replay and the process cannot have ended yet.
let stream = process_event_stream(pid, Vec::new(), spawned.data_rx, spawned.end_rx, None)
let stream = async_stream::stream! { .map(|r| r.map(wrap_start_response));
yield Ok(make_start_response(pid));
loop {
tokio::select! {
biased;
data = data_rx.recv() => {
match data {
Ok(ev) => yield Ok(make_data_start_response(ev)),
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
// Data channel closed: the process ended and its
// handle was dropped. The end event is published
// before the handle drop, so it is still buffered
// — emit it rather than losing the exit code.
if let Ok(end) = end_rx.try_recv() {
yield Ok(make_end_start_response(end));
}
break;
}
}
}
end = end_rx.recv() => {
// Process ended. The waiter joins the output readers
// before sending this event, so every byte is already
// in the data channel — drain it fully before the end.
loop {
match data_rx.try_recv() {
Ok(ev) => yield Ok(make_data_start_response(ev)),
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
Err(_) => break,
}
}
if let Ok(end) = end {
yield Ok(make_end_start_response(end));
}
break;
}
}
}
};
Ok((Box::pin(stream), ctx)) Ok((Box::pin(stream), ctx))
} }
@ -268,81 +230,17 @@ impl Process for ProcessServiceImpl {
let handle = self.get_process_by_selector(selector)?; let handle = self.get_process_by_selector(selector)?;
let pid = handle.pid; let pid = handle.pid;
let mut data_rx = handle.subscribe_data(); // Snapshot buffered output + subscribe live atomically, then read the
let mut end_rx = handle.subscribe_end(); // exit state. Ordering matters: end_rx must be subscribed before
// cached_end is read so a process that exits in the window is still
// observed (via the channel if subscribed in time, via cached_end
// otherwise).
let (replay, data_rx) = handle.subscribe_data_replay();
let end_rx = handle.subscribe_end();
let cached_end = handle.cached_end(); let cached_end = handle.cached_end();
let stream = async_stream::stream! { let stream = process_event_stream(pid, replay, data_rx, end_rx, cached_end)
yield Ok(ConnectResponse { .map(|r| r.map(wrap_connect_response));
event: buffa::MessageField::some(ProcessEvent {
event: Some(process_event::Event::Start(Box::new(
process_event::StartEvent { pid, ..Default::default() },
))),
..Default::default()
}),
..Default::default()
});
if let Some(end) = cached_end {
yield Ok(ConnectResponse {
event: buffa::MessageField::some(make_end_event(end)),
..Default::default()
});
} else {
loop {
tokio::select! {
biased;
data = data_rx.recv() => {
match data {
Ok(ev) => {
yield Ok(ConnectResponse {
event: buffa::MessageField::some(make_data_event(ev)),
..Default::default()
});
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
// Data channel closed: the process ended and
// its handle was dropped. The end event is
// published before the handle drop, so it is
// still buffered — emit it rather than losing
// the exit code.
if let Ok(end) = end_rx.try_recv() {
yield Ok(ConnectResponse {
event: buffa::MessageField::some(make_end_event(end)),
..Default::default()
});
}
break;
}
}
}
end = end_rx.recv() => {
// Process ended. The waiter joins the output readers
// before sending this event, so every byte is already
// in the data channel — drain it fully before the end.
loop {
match data_rx.try_recv() {
Ok(ev) => yield Ok(ConnectResponse {
event: buffa::MessageField::some(make_data_event(ev)),
..Default::default()
}),
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
Err(_) => break,
}
}
if let Ok(end) = end {
yield Ok(ConnectResponse {
event: buffa::MessageField::some(make_end_event(end)),
..Default::default()
});
}
break;
}
}
}
}
};
Ok((Box::pin(stream), ctx)) Ok((Box::pin(stream), ctx))
} }
@ -363,7 +261,12 @@ impl Process for ProcessServiceImpl {
} }
} }
Ok((UpdateResponse { ..Default::default() }, ctx)) Ok((
UpdateResponse {
..Default::default()
},
ctx,
))
} }
async fn stream_input( async fn stream_input(
@ -405,7 +308,12 @@ impl Process for ProcessServiceImpl {
} }
} }
Ok((StreamInputResponse { ..Default::default() }, ctx)) Ok((
StreamInputResponse {
..Default::default()
},
ctx,
))
} }
async fn send_input( async fn send_input(
@ -422,7 +330,12 @@ impl Process for ProcessServiceImpl {
write_input(&handle, input)?; write_input(&handle, input)?;
} }
Ok((SendInputResponse { ..Default::default() }, ctx)) Ok((
SendInputResponse {
..Default::default()
},
ctx,
))
} }
async fn send_signal( async fn send_signal(
@ -442,12 +355,17 @@ impl Process for ProcessServiceImpl {
return Err(ConnectError::new( return Err(ConnectError::new(
ErrorCode::InvalidArgument, ErrorCode::InvalidArgument,
"invalid or unspecified signal", "invalid or unspecified signal",
)) ));
} }
}; };
handle.send_signal(sig)?; handle.send_signal(sig)?;
Ok((SendSignalResponse { ..Default::default() }, ctx)) Ok((
SendSignalResponse {
..Default::default()
},
ctx,
))
} }
async fn close_stdin( async fn close_stdin(
@ -460,7 +378,12 @@ impl Process for ProcessServiceImpl {
})?; })?;
let handle = self.get_process_by_selector(selector)?; let handle = self.get_process_by_selector(selector)?;
handle.close_stdin()?; handle.close_stdin()?;
Ok((CloseStdinResponse { ..Default::default() }, ctx)) Ok((
CloseStdinResponse {
..Default::default()
},
ctx,
))
} }
} }
@ -472,9 +395,100 @@ fn write_input(handle: &ProcessHandle, input: &ProcessInputView) -> Result<(), C
} }
} }
fn make_start_response(pid: u32) -> StartResponse { /// Shared event pump for `Start` and `Connect`. Yields a leading start event,
/// replays any buffered output (empty for `Start`), then forwards live output
/// and the final exit event. The caller wraps each `ProcessEvent` into its own
/// response envelope, so the streaming logic lives in exactly one place.
fn process_event_stream(
pid: u32,
replay: Vec<DataEvent>,
mut data_rx: broadcast::Receiver<DataEvent>,
mut end_rx: broadcast::Receiver<process_handler::EndEvent>,
cached_end: Option<process_handler::EndEvent>,
) -> impl Stream<Item = Result<ProcessEvent, ConnectError>> {
use broadcast::error::{RecvError, TryRecvError};
async_stream::stream! {
yield Ok(make_start_event(pid));
for ev in replay {
yield Ok(make_data_event(ev));
}
// Process already exited before we attached. The snapshot above covers
// output up to the attach point; drain anything the live receiver
// buffered after the snapshot, then emit the cached exit. end_rx may
// never deliver here — a broadcast receiver only sees events sent after
// it subscribed, and the exit can predate that — so cached_end is the
// source of truth.
if let Some(end) = cached_end {
loop {
match data_rx.try_recv() {
Ok(ev) => yield Ok(make_data_event(ev)),
Err(TryRecvError::Lagged(_)) => continue,
Err(_) => break,
}
}
yield Ok(make_end_event(end));
return;
}
loop {
tokio::select! {
biased;
data = data_rx.recv() => {
match data {
Ok(ev) => yield Ok(make_data_event(ev)),
Err(RecvError::Lagged(_)) => continue,
Err(RecvError::Closed) => {
// Data channel closed: the process ended and its
// handle was dropped. The end event is published
// before the handle drop, so it is still buffered —
// emit it rather than losing the exit code.
if let Ok(end) = end_rx.try_recv() {
yield Ok(make_end_event(end));
}
break;
}
}
}
end = end_rx.recv() => {
// Process ended. The waiter joins the output readers before
// sending this event, so every byte is already in the data
// channel — drain it fully before the end.
loop {
match data_rx.try_recv() {
Ok(ev) => yield Ok(make_data_event(ev)),
Err(TryRecvError::Lagged(_)) => continue,
Err(_) => break,
}
}
if let Ok(end) = end {
yield Ok(make_end_event(end));
}
break;
}
}
}
}
}
fn wrap_start_response(event: ProcessEvent) -> StartResponse {
StartResponse { StartResponse {
event: buffa::MessageField::some(ProcessEvent { event: buffa::MessageField::some(event),
..Default::default()
}
}
fn wrap_connect_response(event: ProcessEvent) -> ConnectResponse {
ConnectResponse {
event: buffa::MessageField::some(event),
..Default::default()
}
}
fn make_start_event(pid: u32) -> ProcessEvent {
ProcessEvent {
event: Some(process_event::Event::Start(Box::new( event: Some(process_event::Event::Start(Box::new(
process_event::StartEvent { process_event::StartEvent {
pid, pid,
@ -482,8 +496,6 @@ fn make_start_response(pid: u32) -> StartResponse {
}, },
))), ))),
..Default::default() ..Default::default()
}),
..Default::default()
} }
} }
@ -504,13 +516,6 @@ fn make_data_event(ev: DataEvent) -> ProcessEvent {
} }
} }
fn make_data_start_response(ev: DataEvent) -> StartResponse {
StartResponse {
event: buffa::MessageField::some(make_data_event(ev)),
..Default::default()
}
}
fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent { fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
ProcessEvent { ProcessEvent {
event: Some(process_event::Event::End(Box::new( event: Some(process_event::Event::End(Box::new(
@ -526,13 +531,6 @@ fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
} }
} }
fn make_end_start_response(end: process_handler::EndEvent) -> StartResponse {
StartResponse {
event: buffa::MessageField::some(make_end_event(end)),
..Default::default()
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -589,7 +587,8 @@ mod tests {
fn args_other_user_left_literal() { fn args_other_user_left_literal() {
let home_dir = "/home/testuser"; let home_dir = "/home/testuser";
let args_raw = vec!["~other".to_string(), "~other/path".to_string()]; let args_raw = vec!["~other".to_string(), "~other/path".to_string()];
let args: Vec<String> = args_raw.into_iter() let args: Vec<String> = args_raw
.into_iter()
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a)) .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
.collect(); .collect();
assert_eq!(args, vec!["~other", "~other/path"]); assert_eq!(args, vec!["~other", "~other/path"]);
@ -618,17 +617,22 @@ mod tests {
"/tmp/out".to_string(), "/tmp/out".to_string(),
"~other".to_string(), "~other".to_string(),
]; ];
let args: Vec<String> = args_raw.into_iter() let args: Vec<String> = args_raw
.into_iter()
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a)) .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
.collect(); .collect();
assert_eq!(args, vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]); assert_eq!(
args,
vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]
);
} }
#[test] #[test]
fn args_empty_passthrough() { fn args_empty_passthrough() {
let home_dir = "/home/testuser"; let home_dir = "/home/testuser";
let args_raw: Vec<String> = vec![]; let args_raw: Vec<String> = vec![];
let args: Vec<String> = args_raw.into_iter() let args: Vec<String> = args_raw
.into_iter()
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a)) .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
.collect(); .collect();
assert!(args.is_empty()); assert!(args.is_empty());

View File

@ -1,4 +1,4 @@
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicU8, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use crate::auth::token::SecureToken; use crate::auth::token::SecureToken;
@ -17,6 +17,11 @@ pub struct AppState {
pub port_subsystem: Option<Arc<PortSubsystem>>, pub port_subsystem: Option<Arc<PortSubsystem>>,
pub cpu_used_pct: AtomicU32, pub cpu_used_pct: AtomicU32,
pub cpu_count: AtomicU32, pub cpu_count: AtomicU32,
/// Whole-VM IO throughput, bytes/sec, sampled over the last 1s tick. Used
/// by the host activity sampler to keep IO-bound-but-CPU-idle workloads
/// (e.g. a long download) from being mistaken for inactive.
pub net_bps: AtomicU64,
pub disk_bps: AtomicU64,
/// Memory preload coordination. The host agent POSTs /memory/preload after /// Memory preload coordination. The host agent POSTs /memory/preload after
/// a snapshot restore to materialise every physical page (so the next /// a snapshot restore to materialise every physical page (so the next
@ -56,6 +61,8 @@ impl AppState {
port_subsystem, port_subsystem,
cpu_used_pct: AtomicU32::new(0), cpu_used_pct: AtomicU32::new(0),
cpu_count: AtomicU32::new(0), cpu_count: AtomicU32::new(0),
net_bps: AtomicU64::new(0),
disk_bps: AtomicU64::new(0),
mem_preload_started: AtomicBool::new(false), mem_preload_started: AtomicBool::new(false),
mem_preload_done: AtomicBool::new(false), mem_preload_done: AtomicBool::new(false),
mem_preload_cancel: AtomicBool::new(false), mem_preload_cancel: AtomicBool::new(false),
@ -70,7 +77,7 @@ impl AppState {
let state_clone = Arc::clone(&state); let state_clone = Arc::clone(&state);
std::thread::spawn(move || { std::thread::spawn(move || {
cpu_sampler(state_clone); activity_sampler(state_clone);
}); });
state state
@ -84,6 +91,14 @@ impl AppState {
self.cpu_count.load(Ordering::Relaxed) self.cpu_count.load(Ordering::Relaxed)
} }
pub fn net_bps(&self) -> u64 {
self.net_bps.load(Ordering::Relaxed)
}
pub fn disk_bps(&self) -> u64 {
self.disk_bps.load(Ordering::Relaxed)
}
/// Records a new lifecycle ID, returning true if it changed (i.e. this /// Records a new lifecycle ID, returning true if it changed (i.e. this
/// is the first /init since a resume). First-ever call returns false: /// is the first /init since a resume). First-ever call returns false:
/// boot-time /init doesn't need port-subsystem restart since the /// boot-time /init doesn't need port-subsystem restart since the
@ -99,12 +114,16 @@ impl AppState {
} }
} }
fn cpu_sampler(state: Arc<AppState>) { fn activity_sampler(state: Arc<AppState>) {
use sysinfo::System; use sysinfo::System;
let mut sys = System::new(); let mut sys = System::new();
sys.refresh_cpu_all(); sys.refresh_cpu_all();
// Cumulative IO counters from the previous tick. None until the first read.
let mut prev_net: Option<u64> = read_net_bytes();
let mut prev_disk: Option<u64> = read_disk_bytes();
loop { loop {
std::thread::sleep(std::time::Duration::from_secs(1)); std::thread::sleep(std::time::Duration::from_secs(1));
@ -123,5 +142,73 @@ fn cpu_sampler(state: Arc<AppState>) {
state state
.cpu_count .cpu_count
.store(sys.cpus().len() as u32, Ordering::Relaxed); .store(sys.cpus().len() as u32, Ordering::Relaxed);
// Throughput = cumulative-counter delta over the ~1s tick. Counters can
// reset across a snapshot restore; a wrapped/negative delta reads as 0.
let cur_net = read_net_bytes();
let net_bps = match (prev_net, cur_net) {
(Some(p), Some(c)) => c.saturating_sub(p),
_ => 0,
};
prev_net = cur_net;
let cur_disk = read_disk_bytes();
let disk_bps = match (prev_disk, cur_disk) {
(Some(p), Some(c)) => c.saturating_sub(p),
_ => 0,
};
prev_disk = cur_disk;
state.net_bps.store(net_bps, Ordering::Relaxed);
state.disk_bps.store(disk_bps, Ordering::Relaxed);
} }
} }
/// Sum of rx+tx bytes across all non-loopback interfaces, from /proc/net/dev.
/// Returns None if the file can't be read/parsed.
fn read_net_bytes() -> Option<u64> {
let content = std::fs::read_to_string("/proc/net/dev").ok()?;
let mut total: u64 = 0;
// First two lines are headers.
for line in content.lines().skip(2) {
let Some((iface, rest)) = line.split_once(':') else {
continue;
};
if iface.trim() == "lo" {
continue;
}
let fields: Vec<&str> = rest.split_whitespace().collect();
// Column 0 = rx bytes, column 8 = tx bytes.
if let Some(rx) = fields.first().and_then(|v| v.parse::<u64>().ok()) {
total = total.saturating_add(rx);
}
if let Some(tx) = fields.get(8).and_then(|v| v.parse::<u64>().ok()) {
total = total.saturating_add(tx);
}
}
Some(total)
}
/// Sum of sectors read+written across all block devices, ×512, from
/// /proc/diskstats. Skips partitions and loop/ram devices to avoid double
/// counting. Returns None if the file can't be read/parsed.
fn read_disk_bytes() -> Option<u64> {
let content = std::fs::read_to_string("/proc/diskstats").ok()?;
let mut sectors: u64 = 0;
for line in content.lines() {
let fields: Vec<&str> = line.split_whitespace().collect();
// 0=major 1=minor 2=name ... 5=sectors read ... 9=sectors written.
if fields.len() < 10 {
continue;
}
let name = fields[2];
if name.starts_with("loop") || name.starts_with("ram") {
continue;
}
let read = fields[5].parse::<u64>().unwrap_or(0);
let written = fields[9].parse::<u64>().unwrap_or(0);
sectors = sectors.saturating_add(read).saturating_add(written);
}
// Linux reports diskstats sectors in fixed 512-byte units.
Some(sectors.saturating_mul(512))
}

View File

@ -23,12 +23,10 @@ impl AtomicMax {
if new <= current { if new <= current {
return false; return false;
} }
match self.val.compare_exchange_weak( match self
current, .val
new, .compare_exchange_weak(current, new, Ordering::Release, Ordering::Relaxed)
Ordering::Release, {
Ordering::Relaxed,
) {
Ok(_) => return true, Ok(_) => return true,
Err(_) => continue, Err(_) => continue,
} }

View File

@ -53,14 +53,15 @@
let byocPageCount = $derived(Math.max(1, Math.ceil(flatByocHosts.length / PAGE_SIZE))); let byocPageCount = $derived(Math.max(1, Math.ceil(flatByocHosts.length / PAGE_SIZE)));
let byocPageHosts = $derived(flatByocHosts.slice(byocPage * PAGE_SIZE, (byocPage + 1) * PAGE_SIZE)); let byocPageHosts = $derived(flatByocHosts.slice(byocPage * PAGE_SIZE, (byocPage + 1) * PAGE_SIZE));
// Stats across all hosts // Aggregated stats — platform hosts only (admin needs a heads-up on
let onlineCount = $derived(allHosts.filter((h) => h.status === 'online').length); // platform capacity; BYOC capacity belongs to individual teams).
let pendingCount = $derived(allHosts.filter((h) => h.status === 'pending').length); let onlineCount = $derived(platformHosts.filter((h) => h.status === 'online').length);
let totalCount = $derived(allHosts.length); let pendingCount = $derived(platformHosts.filter((h) => h.status === 'pending').length);
let totalCpuCores = $derived(allHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0)); let totalCount = $derived(platformHosts.length);
let totalMemoryMb = $derived(allHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0)); let totalCpuCores = $derived(platformHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0));
let totalRunningVcpus = $derived(allHosts.reduce((sum, h) => sum + h.running_vcpus, 0)); let totalMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0));
let totalRunningMemoryMb = $derived(allHosts.reduce((sum, h) => sum + h.running_memory_mb, 0)); let totalRunningVcpus = $derived(platformHosts.reduce((sum, h) => sum + h.running_vcpus, 0));
let totalRunningMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + h.running_memory_mb, 0));
function formatMem(mb: number): string { function formatMem(mb: number): string {
return mb >= 1024 ? `${(mb / 1024).toFixed(0)} GB` : `${mb} MB`; return mb >= 1024 ? `${(mb / 1024).toFixed(0)} GB` : `${mb} MB`;

View File

@ -6,10 +6,6 @@
let { children } = $props(); let { children } = $props();
</script> </script>
<svelte:head>
<title>Wrenn — Capsules</title>
</svelte:head>
<main class="flex flex-1 flex-col overflow-y-auto bg-[var(--color-bg-0)]"> <main class="flex flex-1 flex-col overflow-y-auto bg-[var(--color-bg-0)]">
<!-- Header area --> <!-- Header area -->
{#if $page.params.id} {#if $page.params.id}

View File

@ -256,6 +256,10 @@
}); });
</script> </script>
<svelte:head>
<title>Wrenn — Capsules</title>
</svelte:head>
<style> <style>
@keyframes capsule-born { @keyframes capsule-born {
0%, 25% { background-color: rgba(94, 140, 88, 0.1); } 0%, 25% { background-color: rgba(94, 140, 88, 0.1); }

View File

@ -130,22 +130,8 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C
// Forward stream events to WebSocket. // Forward stream events to WebSocket.
for stream.Receive() { for stream.Receive() {
resp := stream.Msg() if m, ok := procRespToWSMsg(stream.Msg()); ok {
switch ev := resp.Event.(type) { writeWSJSON(conn, m)
case *pb.ExecStreamResponse_Start:
writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
case *pb.ExecStreamResponse_Data:
switch o := ev.Data.Output.(type) {
case *pb.ExecStreamData_Stdout:
writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
case *pb.ExecStreamData_Stderr:
writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
}
case *pb.ExecStreamResponse_End:
exitCode := ev.End.ExitCode
writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
} }
} }
@ -159,6 +145,38 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C
updateLastActive(h.db, sandboxID, sandboxIDStr) updateLastActive(h.db, sandboxID, sandboxIDStr)
} }
// procStreamResp is satisfied by both *pb.ExecStreamResponse and
// *pb.ConnectProcessResponse: their oneof events carry the same inner messages,
// so the wire-to-WS mapping below is shared between the exec-stream and
// connect-process handlers.
type procStreamResp interface {
GetStart() *pb.ExecStreamStart
GetData() *pb.ExecStreamData
GetEnd() *pb.ExecStreamEnd
}
// procRespToWSMsg maps one process stream response to the WS message to send.
// The bool is false when the response carries nothing to forward.
func procRespToWSMsg(resp procStreamResp) (wsOutMsg, bool) {
if s := resp.GetStart(); s != nil {
return wsOutMsg{Type: "start", PID: s.Pid}, true
}
if d := resp.GetData(); d != nil {
switch o := d.Output.(type) {
case *pb.ExecStreamData_Stdout:
return wsOutMsg{Type: "stdout", Data: string(o.Stdout)}, true
case *pb.ExecStreamData_Stderr:
return wsOutMsg{Type: "stderr", Data: string(o.Stderr)}, true
}
return wsOutMsg{}, false
}
if e := resp.GetEnd(); e != nil {
exitCode := e.ExitCode
return wsOutMsg{Type: "exit", ExitCode: &exitCode}, true
}
return wsOutMsg{}, false
}
func sendWSError(conn *websocket.Conn, msg string) { func sendWSError(conn *websocket.Conn, msg string) {
writeWSJSON(conn, wsOutMsg{Type: "error", Data: msg}) writeWSJSON(conn, wsOutMsg{Type: "error", Data: msg})
} }

View File

@ -192,22 +192,8 @@ func (h *processHandler) runConnectProcess(ctx context.Context, conn *websocket.
// Forward stream events to WebSocket. // Forward stream events to WebSocket.
for stream.Receive() { for stream.Receive() {
resp := stream.Msg() if m, ok := procRespToWSMsg(stream.Msg()); ok {
switch ev := resp.Event.(type) { writeWSJSON(conn, m)
case *pb.ConnectProcessResponse_Start:
writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
case *pb.ConnectProcessResponse_Data:
switch o := ev.Data.Output.(type) {
case *pb.ExecStreamData_Stdout:
writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
case *pb.ExecStreamData_Stderr:
writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
}
case *pb.ConnectProcessResponse_End:
exitCode := ev.End.ExitCode
writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
} }
} }

View File

@ -60,6 +60,10 @@ func agentErrToHTTP(err error) (int, string, string) {
return http.StatusServiceUnavailable, "no_hosts_available", "no servers available — try again later" return http.StatusServiceUnavailable, "no_hosts_available", "no servers available — try again later"
case connect.CodeUnimplemented: case connect.CodeUnimplemented:
return http.StatusNotImplemented, "agent_error", err.Error() return http.StatusNotImplemented, "agent_error", err.Error()
case connect.CodeDeadlineExceeded:
return http.StatusGatewayTimeout, "timeout", "command timed out"
case connect.CodeInternal:
return http.StatusInternalServerError, "agent_error", err.Error()
default: default:
return http.StatusBadGateway, "agent_error", err.Error() return http.StatusBadGateway, "agent_error", err.Error()
} }

View File

@ -144,7 +144,7 @@ func (c *SandboxEventConsumer) handleMessage(ctx context.Context, msg redis.XMes
} }
case events.CapsulePause: case events.CapsulePause:
if event.Outcome == events.OutcomeSuccess { if event.Outcome == events.OutcomeSuccess {
c.handleAutoPaused(ctx, sandboxID) c.handleAutoPaused(ctx, sandboxID, event)
} }
case events.CapsuleDestroy: case events.CapsuleDestroy:
if event.Outcome == events.OutcomeSuccess { if event.Outcome == events.OutcomeSuccess {
@ -226,12 +226,35 @@ func (c *SandboxEventConsumer) handleStarted(ctx context.Context, sandboxID pgty
} }
} }
func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID) { // handleAutoPaused reflects an autonomous (TTL reaper / shutdown) pause in the
// DB and writes the audit row for it. The audit write happens only when the
// status flip actually applied, so a stream redelivery does not double-count,
// and so the HostMonitor host_state_sync fallback (which audits the
// callback-lost case) stays mutually exclusive with this path.
//
// Uses audit.Log (row only) — NOT LogSandboxAutoPause, which republishes a
// CapsulePause/system event that would loop straight back into this consumer.
func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID, event events.Event) {
for _, fromStatus := range []string{"running", "pausing"} { for _, fromStatus := range []string{"running", "pausing"} {
if _, err := c.db.UpdateSandboxStatusIf(ctx, db.UpdateSandboxStatusIfParams{ if _, err := c.db.UpdateSandboxStatusIf(ctx, db.UpdateSandboxStatusIfParams{
ID: sandboxID, Status: fromStatus, Status_2: "paused", ID: sandboxID, Status: fromStatus, Status_2: "paused",
}); err == nil { }); err == nil {
slog.Debug("sandbox event consumer: auto-paused fallback applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus) slog.Debug("sandbox event consumer: auto-paused applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus)
reason := event.Metadata["reason"]
if reason == "" {
reason = "ttl_expired"
}
teamID, _ := id.ParseTeamID(event.TeamID)
c.audit.Log(ctx, audit.Entry{
TeamID: teamID,
ActorType: "system",
ResourceType: "sandbox",
ResourceID: id.FormatSandboxID(sandboxID),
Action: "pause",
Scope: "team",
Status: "info",
Metadata: map[string]any{"reason": reason},
})
return return
} }
} }

View File

@ -104,6 +104,14 @@ func (r *SSERelay) handleMessage(ctx context.Context, msg *redis.Message) {
if err != nil { if err != nil {
slog.Debug("sse relay: sandbox hydration failed (may be deleted)", "sandbox_id", event.Resource.ID, "error", err) slog.Debug("sse relay: sandbox hydration failed (may be deleted)", "sandbox_id", event.Resource.ID, "error", err)
} else { } else {
// Override the hydrated status with the status implied by the event
// verb. Autonomous transitions (e.g. TTL auto-pause) flip the DB row
// in a separate stream consumer that races this Pub/Sub read, so the
// hydrated row may still carry the pre-transition status. The event
// itself is authoritative for the resulting state.
if status, ok := impliedSandboxStatus(event); ok {
sb.Status = status
}
payload.Sandbox = sb payload.Sandbox = sb
} }
} }
@ -138,6 +146,25 @@ func (r *SSERelay) hydrateSandbox(ctx context.Context, sandboxIDStr string) (*sa
return &resp, nil return &resp, nil
} }
// impliedSandboxStatus maps a successful capsule lifecycle event to the
// sandbox status it results in. Used to override a hydrated DB row that may
// still carry the pre-transition status because the reconciliation consumer
// that flips it races this Pub/Sub read. Returns false for events with no
// single deterministic resulting status (failures, destroy, state_changed).
func impliedSandboxStatus(event events.Event) (string, bool) {
if event.Outcome != events.OutcomeSuccess {
return "", false
}
switch event.Event {
case events.CapsulePause:
return "paused", true
case events.CapsuleResume, events.CapsuleCreate:
return "running", true
default:
return "", false
}
}
func isCapsuleEvent(eventType string) bool { func isCapsuleEvent(eventType string) bool {
switch eventType { switch eventType {
case events.CapsuleCreate, events.CapsulePause, events.CapsuleResume, events.CapsuleDestroy, events.CapsuleStateChanged: case events.CapsuleCreate, events.CapsulePause, events.CapsuleResume, events.CapsuleDestroy, events.CapsuleStateChanged:

View File

@ -25,6 +25,7 @@ type Client struct {
hostIP string hostIP string
base string base string
healthURL string healthURL string
activityURL string
httpClient *http.Client httpClient *http.Client
streamingClient *http.Client streamingClient *http.Client
@ -42,6 +43,7 @@ func New(hostIP string) *Client {
hostIP: hostIP, hostIP: hostIP,
base: base, base: base,
healthURL: base + "/health", healthURL: base + "/health",
activityURL: base + "/activity",
httpClient: httpClient, httpClient: httpClient,
streamingClient: streamingClient, streamingClient: streamingClient,
process: genconnect.NewProcessClient(streamingClient, base), process: genconnect.NewProcessClient(streamingClient, base),
@ -117,36 +119,17 @@ func (c *Client) Exec(ctx context.Context, cmd string, args []string, opts *Exec
result := &ExecResult{} result := &ExecResult{}
for stream.Receive() { for stream.Receive() {
msg := stream.Msg() ev, ok := procEventToStreamEvent(stream.Msg().GetEvent())
if msg.Event == nil { if !ok {
continue continue
} }
switch ev.Type {
event := msg.Event.GetEvent() case "stdout":
switch e := event.(type) { result.Stdout = append(result.Stdout, ev.Data...)
case *envdpb.ProcessEvent_Start: case "stderr":
slog.Debug("process started", "pid", e.Start.GetPid()) result.Stderr = append(result.Stderr, ev.Data...)
case "end":
case *envdpb.ProcessEvent_Data: result.ExitCode = ev.ExitCode
output := e.Data.GetOutput()
switch o := output.(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
result.Stdout = append(result.Stdout, o.Stdout...)
case *envdpb.ProcessEvent_DataEvent_Stderr:
result.Stderr = append(result.Stderr, o.Stderr...)
}
case *envdpb.ProcessEvent_End:
result.ExitCode = e.End.GetExitCode()
if e.End.Error != nil {
slog.Debug("process ended with error",
"exit_code", e.End.GetExitCode(),
"error", e.End.GetError(),
)
}
case *envdpb.ProcessEvent_Keepalive:
// Ignore keepalives.
} }
} }
@ -166,6 +149,76 @@ type ExecStreamEvent struct {
Error string Error string
} }
// procEventToStreamEvent converts a raw envd ProcessEvent into an
// ExecStreamEvent. The second return is false for events with no payload to
// forward (nil event, keepalive, unknown data variant) so callers can skip
// them. This is the single decoder shared by Exec, ExecStream and
// ConnectProcess.
func procEventToStreamEvent(pe *envdpb.ProcessEvent) (ExecStreamEvent, bool) {
if pe == nil {
return ExecStreamEvent{}, false
}
switch e := pe.GetEvent().(type) {
case *envdpb.ProcessEvent_Start:
return ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}, true
case *envdpb.ProcessEvent_Data:
switch o := e.Data.GetOutput().(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
return ExecStreamEvent{Type: "stdout", Data: o.Stdout}, true
case *envdpb.ProcessEvent_DataEvent_Stderr:
return ExecStreamEvent{Type: "stderr", Data: o.Stderr}, true
}
return ExecStreamEvent{}, false
case *envdpb.ProcessEvent_End:
ev := ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
if e.End.Error != nil {
ev.Error = e.End.GetError()
}
return ev, true
}
return ExecStreamEvent{}, false
}
// procEventStream is the subset of a Connect server-stream that pumpProcessEvents
// needs. Both *connect.ServerStreamForClient[StartResponse] and
// [ConnectResponse] satisfy it.
type procEventStream[T any] interface {
Receive() bool
Msg() *T
Err() error
Close() error
}
// pumpProcessEvents drains a process server-stream into ch until the stream ends
// or ctx is cancelled, closing ch on exit. getEvent extracts the ProcessEvent
// from each message so the same loop works for both the Start and Connect RPCs.
func pumpProcessEvents[T any](
ctx context.Context,
stream procEventStream[T],
getEvent func(*T) *envdpb.ProcessEvent,
ch chan<- ExecStreamEvent,
logLabel string,
) {
defer close(ch)
defer stream.Close()
for stream.Receive() {
ev, ok := procEventToStreamEvent(getEvent(stream.Msg()))
if !ok {
continue
}
select {
case ch <- ev:
case <-ctx.Done():
return
}
}
if err := stream.Err(); err != nil && err != io.EOF {
slog.Debug(logLabel, "error", err)
}
}
// ExecStream runs a command inside the sandbox and returns a channel of output events. // ExecStream runs a command inside the sandbox and returns a channel of output events.
// The channel is closed when the process ends or the context is cancelled. // The channel is closed when the process ends or the context is cancelled.
func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-chan ExecStreamEvent, error) { func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-chan ExecStreamEvent, error) {
@ -184,52 +237,7 @@ func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-
} }
ch := make(chan ExecStreamEvent, 256) ch := make(chan ExecStreamEvent, 256)
go func() { go pumpProcessEvents(ctx, stream, (*envdpb.StartResponse).GetEvent, ch, "exec stream error")
defer close(ch)
defer stream.Close()
for stream.Receive() {
msg := stream.Msg()
if msg.Event == nil {
continue
}
var ev ExecStreamEvent
event := msg.Event.GetEvent()
switch e := event.(type) {
case *envdpb.ProcessEvent_Start:
ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
case *envdpb.ProcessEvent_Data:
output := e.Data.GetOutput()
switch o := output.(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
case *envdpb.ProcessEvent_DataEvent_Stderr:
ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
}
case *envdpb.ProcessEvent_End:
ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
if e.End.Error != nil {
ev.Error = e.End.GetError()
}
case *envdpb.ProcessEvent_Keepalive:
continue
}
select {
case ch <- ev:
case <-ctx.Done():
return
}
}
if err := stream.Err(); err != nil && err != io.EOF {
slog.Debug("exec stream error", "error", err)
}
}()
return ch, nil return ch, nil
} }
@ -434,7 +442,7 @@ func (c *Client) CancelMemoryPreload(ctx context.Context) error {
// post-restore initialization. sandbox_id and template_id are passed // post-restore initialization. sandbox_id and template_id are passed
// so envd can set WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID env vars. // so envd can set WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID env vars.
func (c *Client) PostInit(ctx context.Context) error { func (c *Client) PostInit(ctx context.Context) error {
return c.PostInitWithDefaults(ctx, "", nil, "", "") return c.PostInitWithDefaults(ctx, "", nil, "", "", "")
} }
// PostInitWithDefaults calls envd's POST /init endpoint with optional default // PostInitWithDefaults calls envd's POST /init endpoint with optional default
@ -444,7 +452,7 @@ func (c *Client) PostInit(ctx context.Context) error {
// timestamp and lifecycle_id are always populated: envd uses them to snap // timestamp and lifecycle_id are always populated: envd uses them to snap
// the guest clock to the host's wall time and to detect post-resume calls // the guest clock to the host's wall time and to detect post-resume calls
// (which trigger port-forwarder restart + NFS remount). // (which trigger port-forwarder restart + NFS remount).
func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID string) error { func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID, proxyDomain string) error {
payload := map[string]any{ payload := map[string]any{
"timestamp": time.Now().UTC().Format(time.RFC3339Nano), "timestamp": time.Now().UTC().Format(time.RFC3339Nano),
"lifecycle_id": uuid.NewString(), "lifecycle_id": uuid.NewString(),
@ -461,6 +469,9 @@ func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, e
if templateID != "" { if templateID != "" {
payload["template_id"] = templateID payload["template_id"] = templateID
} }
if proxyDomain != "" {
payload["proxy_domain"] = proxyDomain
}
var body io.Reader var body io.Reader
if len(payload) > 0 { if len(payload) > 0 {

View File

@ -81,6 +81,42 @@ func (c *Client) WaitUntilRPCReady(ctx context.Context) error {
} }
} }
// Activity is envd's liveness snapshot: VM-wide CPU utilisation and IO
// throughput sampled inside the guest. The host activity sampler uses it to
// decide whether a sandbox is doing real work and should keep its TTL fresh.
type Activity struct {
CPUCount uint32 `json:"cpu_count"`
CPUUsedPct float32 `json:"cpu_used_pct"`
NetBps uint64 `json:"net_bps"`
DiskBps uint64 `json:"disk_bps"`
}
// FetchActivity polls envd's /activity endpoint. The endpoint serves straight
// from in-guest atomics (no syscalls), so it is cheap to call frequently.
func (c *Client) FetchActivity(ctx context.Context) (*Activity, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.activityURL, nil)
if err != nil {
return nil, fmt.Errorf("build activity request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("fetch envd activity: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("activity check returned %d", resp.StatusCode)
}
var data Activity
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return nil, fmt.Errorf("decode activity response: %w", err)
}
return &data, nil
}
// healthCheck sends a single GET /health request to envd. // healthCheck sends a single GET /health request to envd.
func (c *Client) healthCheck(ctx context.Context) error { func (c *Client) healthCheck(ctx context.Context) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil) req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil)

View File

@ -4,7 +4,6 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"log/slog"
"connectrpc.com/connect" "connectrpc.com/connect"
@ -87,52 +86,7 @@ func (c *Client) ConnectProcess(ctx context.Context, pid uint32, tag string) (<-
} }
ch := make(chan ExecStreamEvent, 16) ch := make(chan ExecStreamEvent, 16)
go func() { go pumpProcessEvents(ctx, stream, (*envdpb.ConnectResponse).GetEvent, ch, "connect process stream error")
defer close(ch)
defer stream.Close()
for stream.Receive() {
msg := stream.Msg()
if msg.Event == nil {
continue
}
var ev ExecStreamEvent
switch e := msg.Event.GetEvent().(type) {
case *envdpb.ProcessEvent_Start:
ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
case *envdpb.ProcessEvent_Data:
switch o := e.Data.GetOutput().(type) {
case *envdpb.ProcessEvent_DataEvent_Stdout:
ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
case *envdpb.ProcessEvent_DataEvent_Stderr:
ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
default:
continue
}
case *envdpb.ProcessEvent_End:
ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
if e.End.Error != nil {
ev.Error = e.End.GetError()
}
case *envdpb.ProcessEvent_Keepalive:
continue
}
select {
case ch <- ev:
case <-ctx.Done():
return
}
}
if err := stream.Err(); err != nil && err != io.EOF {
slog.Debug("connect process stream error", "error", err)
}
}()
return ch, nil return ch, nil
} }

View File

@ -253,7 +253,7 @@ func (s *Server) Exec(
result, err := s.mgr.Exec(execCtx, msg.SandboxId, msg.Cmd, msg.Args, opts) result, err := s.mgr.Exec(execCtx, msg.SandboxId, msg.Cmd, msg.Args, opts)
if err != nil { if err != nil {
return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("exec: %w", err)) return nil, envdErr("exec", err)
} }
return connect.NewResponse(&pb.ExecResponse{ return connect.NewResponse(&pb.ExecResponse{
@ -395,31 +395,15 @@ func (s *Server) ExecStream(
} }
for ev := range events { for ev := range events {
start, data, end := execEventParts(ev)
var resp pb.ExecStreamResponse var resp pb.ExecStreamResponse
switch ev.Type { switch {
case "start": case start != nil:
resp.Event = &pb.ExecStreamResponse_Start{ resp.Event = &pb.ExecStreamResponse_Start{Start: start}
Start: &pb.ExecStreamStart{Pid: ev.PID}, case data != nil:
} resp.Event = &pb.ExecStreamResponse_Data{Data: data}
case "stdout": case end != nil:
resp.Event = &pb.ExecStreamResponse_Data{ resp.Event = &pb.ExecStreamResponse_End{End: end}
Data: &pb.ExecStreamData{
Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
},
}
case "stderr":
resp.Event = &pb.ExecStreamResponse_Data{
Data: &pb.ExecStreamData{
Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
},
}
case "end":
resp.Event = &pb.ExecStreamResponse_End{
End: &pb.ExecStreamEnd{
ExitCode: ev.ExitCode,
Error: ev.Error,
},
}
default: default:
continue continue
} }
@ -431,6 +415,24 @@ func (s *Server) ExecStream(
return nil return nil
} }
// execEventParts maps a streaming exec event to its proto inner message.
// Exactly one return value is non-nil; all-nil means the event carries nothing
// to forward. Shared by ExecStream and ConnectProcess, which differ only in the
// response envelope wrapping these inner messages.
func execEventParts(ev envdclient.ExecStreamEvent) (*pb.ExecStreamStart, *pb.ExecStreamData, *pb.ExecStreamEnd) {
switch ev.Type {
case "start":
return &pb.ExecStreamStart{Pid: ev.PID}, nil, nil
case "stdout":
return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data}}, nil
case "stderr":
return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data}}, nil
case "end":
return nil, nil, &pb.ExecStreamEnd{ExitCode: ev.ExitCode, Error: ev.Error}
}
return nil, nil, nil
}
func (s *Server) WriteFileStream( func (s *Server) WriteFileStream(
ctx context.Context, ctx context.Context,
stream *connect.ClientStream[pb.WriteFileStreamRequest], stream *connect.ClientStream[pb.WriteFileStreamRequest],
@ -912,31 +914,15 @@ func (s *Server) ConnectProcess(
} }
for ev := range events { for ev := range events {
start, data, end := execEventParts(ev)
var resp pb.ConnectProcessResponse var resp pb.ConnectProcessResponse
switch ev.Type { switch {
case "start": case start != nil:
resp.Event = &pb.ConnectProcessResponse_Start{ resp.Event = &pb.ConnectProcessResponse_Start{Start: start}
Start: &pb.ExecStreamStart{Pid: ev.PID}, case data != nil:
} resp.Event = &pb.ConnectProcessResponse_Data{Data: data}
case "stdout": case end != nil:
resp.Event = &pb.ConnectProcessResponse_Data{ resp.Event = &pb.ConnectProcessResponse_End{End: end}
Data: &pb.ExecStreamData{
Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
},
}
case "stderr":
resp.Event = &pb.ConnectProcessResponse_Data{
Data: &pb.ExecStreamData{
Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
},
}
case "end":
resp.Event = &pb.ConnectProcessResponse_End{
End: &pb.ExecStreamEnd{
ExitCode: ev.ExitCode,
Error: ev.Error,
},
}
default: default:
continue continue
} }

View File

@ -0,0 +1,111 @@
package sandbox
import (
"testing"
"git.omukk.dev/wrenn/wrenn/internal/envdclient"
)
func TestIsBusy(t *testing.T) {
tests := []struct {
name string
cfg Config
act envdclient.Activity
want bool
}{
// Default thresholds (zero cfg → defaults: cpu 5%, net 16K, disk 32K).
{"idle", Config{}, envdclient.Activity{CPUUsedPct: 0.5, NetBps: 100, DiskBps: 200}, false},
{"cpu just below", Config{}, envdclient.Activity{CPUUsedPct: 4.99}, false},
{"cpu at threshold", Config{}, envdclient.Activity{CPUUsedPct: 5.0}, true},
{"cpu above", Config{}, envdclient.Activity{CPUUsedPct: 80.0}, true},
{"net just below", Config{}, envdclient.Activity{NetBps: 16*1024 - 1}, false},
{"net at floor", Config{}, envdclient.Activity{NetBps: 16 * 1024}, true},
{"disk just below", Config{}, envdclient.Activity{DiskBps: 32*1024 - 1}, false},
{"disk at floor", Config{}, envdclient.Activity{DiskBps: 32 * 1024}, true},
{"download: low cpu, high net", Config{}, envdclient.Activity{CPUUsedPct: 1.0, NetBps: 5 * 1024 * 1024}, true},
// Explicit overrides take precedence over defaults.
{
"custom cpu threshold met",
Config{CPUBusyPct: 20.0},
envdclient.Activity{CPUUsedPct: 25.0},
true,
},
{
"custom cpu threshold not met",
Config{CPUBusyPct: 20.0},
envdclient.Activity{CPUUsedPct: 10.0},
false,
},
{
"custom net floor not met",
Config{NetFloorBps: 1024 * 1024},
envdclient.Activity{NetBps: 16 * 1024},
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m := &Manager{cfg: tt.cfg}
if got := m.isBusy(&tt.act); got != tt.want {
t.Errorf("isBusy(%+v) = %v, want %v", tt.act, got, tt.want)
}
})
}
}
func TestApplyBusySample(t *testing.T) {
// Debounce requires busyDebounceSamples consecutive busy samples before the
// first bump. Verify the streak math and bump timing.
if busyDebounceSamples != 2 {
t.Skip("test written for busyDebounceSamples=2")
}
tests := []struct {
name string
startStreak int
busy bool
wantStreak int
wantBump bool
}{
{"first busy, no bump yet", 0, true, 1, false},
{"second consecutive busy, bump", 1, true, 2, true},
{"sustained busy keeps bumping, streak held", 2, true, 2, true},
{"single noise spike from idle, no bump", 0, false, 0, false},
{"idle resets a building streak", 1, false, 0, false},
{"idle resets a saturated streak", 2, false, 0, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotStreak, gotBump := applyBusySample(tt.startStreak, tt.busy)
if gotStreak != tt.wantStreak || gotBump != tt.wantBump {
t.Errorf("applyBusySample(%d, %v) = (%d, %v), want (%d, %v)",
tt.startStreak, tt.busy, gotStreak, gotBump, tt.wantStreak, tt.wantBump)
}
})
}
}
// TestApplyBusySample_NoiseScenario walks a realistic sample sequence: brief
// noise never crosses the debounce, but sustained work does and then a return
// to idle resets — proving an isolated spike cannot keep a sandbox alive.
func TestApplyBusySample_NoiseScenario(t *testing.T) {
if busyDebounceSamples != 2 {
t.Skip("test written for busyDebounceSamples=2")
}
samples := []bool{true, false, false, true, true, true, false}
wantBumps := []bool{false, false, false, false, true, true, false}
streak := 0
for i, busy := range samples {
var bump bool
streak, bump = applyBusySample(streak, busy)
if bump != wantBumps[i] {
t.Errorf("sample %d (busy=%v): bump = %v, want %v (streak=%d)",
i, busy, bump, wantBumps[i], streak)
}
}
}

View File

@ -88,14 +88,47 @@ type Config struct {
EnvdTimeout time.Duration EnvdTimeout time.Duration
DefaultRootfsSizeMB int // target size for template rootfs images; 0 → DefaultDiskSizeMB DefaultRootfsSizeMB int // target size for template rootfs images; 0 → DefaultDiskSizeMB
// ProxyDomain is the public domain sandboxes are served under (e.g.
// "wrenn.dev"). Injected into envd at /init so `envd ports` can build
// {port}-{sandbox_id}.{domain} URLs.
ProxyDomain string
// Resolved at startup by the host agent. // Resolved at startup by the host agent.
KernelPath string // path to the latest vmlinux-x.y.z KernelPath string // path to the latest vmlinux-x.y.z
KernelVersion string // semver extracted from filename KernelVersion string // semver extracted from filename
VMMBin string // path to the cloud-hypervisor binary VMMBin string // path to the cloud-hypervisor binary
VMMVersion string // semver from cloud-hypervisor --version VMMVersion string // semver from cloud-hypervisor --version
AgentVersion string // host agent version (injected via ldflags) AgentVersion string // host agent version (injected via ldflags)
// Activity sampler thresholds. The sampler polls each running sandbox's
// guest liveness and refreshes its TTL when it is doing real work, so a
// long-running but non-interactive job is not mistaken for inactive. A
// sandbox counts as busy when guest CPU ≥ CPUBusyPct, or net/disk
// throughput ≥ the respective floor (bytes/sec). Zero values fall back to
// the package defaults at sampler start.
ActivitySampleInterval time.Duration
CPUBusyPct float32
NetFloorBps uint64
DiskFloorBps uint64
} }
// Activity sampler defaults. Thresholds sit clear of idle-VM background noise
// (envd's own sampler thread, guest timers) so a parked sandbox still times
// out; the debounce below guards against a lone noisy sample masquerading as
// work. All are env-overridable on the host agent.
const (
defaultActivitySampleInterval = 5 * time.Second
defaultCPUBusyPct = 5.0 // percent of total vCPU capacity
defaultNetFloorBps = 16 * 1024 // 16 KB/s
defaultDiskFloorBps = 32 * 1024 // 32 KB/s
activityPollTimeout = 3 * time.Second
activitySampleConcurrency = 16
// busyDebounceSamples is how many consecutive busy samples are required
// before the sandbox's TTL is refreshed. With a 5s interval, real work
// registers within ~10s while isolated noise spikes are ignored.
busyDebounceSamples = 2
)
// LifecycleEvent describes an autonomous state change initiated by the agent. // LifecycleEvent describes an autonomous state change initiated by the agent.
type LifecycleEvent struct { type LifecycleEvent struct {
Event string Event string
@ -189,6 +222,12 @@ type sandboxState struct {
ring *metricsRing // tiered ring buffers for CPU/mem/disk metrics ring *metricsRing // tiered ring buffers for CPU/mem/disk metrics
samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine
samplerDone chan struct{} // closed when the sampling goroutine exits samplerDone chan struct{} // closed when the sampling goroutine exits
// activityBusyStreak counts consecutive busy activity samples. A single
// noisy sample (idle background CPU, a stray packet) must not refresh the
// TTL, so LastActiveAt is only bumped once the streak reaches
// busyDebounceSamples. Reset to 0 by any non-busy sample. Guarded by m.mu.
activityBusyStreak int
} }
// buildMetadata constructs the metadata map with version information. // buildMetadata constructs the metadata map with version information.
@ -419,14 +458,14 @@ func (m *Manager) Create(
// Fetch envd version (best-effort). // Fetch envd version (best-effort).
envdVersion, _ := client.FetchVersion(ctx) envdVersion, _ := client.FetchVersion(ctx)
// Apply template defaults via envd /init (no-op when both empty). // Apply template defaults + sandbox identity via envd /init. Always called
if defaultUser != "" || len(defaultEnv) > 0 { // on create so envd records its sandbox ID and proxy domain (used by
// `envd ports`), even when the template specifies no user/env defaults.
initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout) initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID)); err != nil { if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID), m.cfg.ProxyDomain); err != nil {
slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err) slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err)
} }
initCancel() initCancel()
}
now := time.Now() now := time.Now()
sb := &sandboxState{ sb := &sandboxState{
@ -667,7 +706,7 @@ func (m *Manager) SetDefaults(ctx context.Context, sandboxID, defaultUser string
if err != nil { if err != nil {
return err return err
} }
return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "") return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "", "")
} }
// PtyAttach starts a new PTY process or reconnects to an existing one. // PtyAttach starts a new PTY process or reconnects to an existing one.
@ -762,6 +801,11 @@ func (m *Manager) AcquireProxyConn(sandboxID string) (net.IP, *ConnTracker, bool
if !sb.connTracker.Acquire() { if !sb.connTracker.Acquire() {
return nil, nil, false return nil, nil, false
} }
// Inbound proxy traffic counts as activity: an idle web server reachable
// through the proxy should not be auto-paused while it is serving requests.
m.mu.Lock()
sb.LastActiveAt = time.Now()
m.mu.Unlock()
return sb.HostIP, sb.connTracker, true return sb.HostIP, sb.connTracker, true
} }
@ -872,6 +916,146 @@ func (m *Manager) reapExpired(_ context.Context) {
} }
} }
// StartActivitySampler starts a background goroutine that polls each running
// sandbox's guest liveness (CPU + net/disk IO) and refreshes LastActiveAt when
// the sandbox is doing real work. This is what keeps a long-running but
// non-interactive job (a build, a download) from being auto-paused by the TTL
// reaper, while an idle workload (sleep, a parked shell) still times out.
func (m *Manager) StartActivitySampler(ctx context.Context) {
interval := m.cfg.ActivitySampleInterval
if interval <= 0 {
interval = defaultActivitySampleInterval
}
go func() {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-m.stopCh:
return
case <-ticker.C:
m.sampleActivity(ctx)
}
}
}()
}
// activityTarget pairs a sandbox ID with the envd client to poll.
type activityTarget struct {
id string
client *envdclient.Client
}
func (m *Manager) sampleActivity(ctx context.Context) {
// Snapshot the running sandboxes and their clients under the lock, then
// poll over the network without holding it.
m.mu.RLock()
targets := make([]activityTarget, 0, len(m.boxes))
for id, sb := range m.boxes {
if sb.Status != models.StatusRunning {
continue
}
// Skip sandboxes still loading memory after a resume — they are not
// settled and their IO/CPU is preload noise, not user work.
if sb.memLoadDone != nil {
select {
case <-sb.memLoadDone:
default:
continue
}
}
c := sb.client.Load()
if c == nil {
continue
}
targets = append(targets, activityTarget{id: id, client: c})
}
m.mu.RUnlock()
if len(targets) == 0 {
return
}
sem := make(chan struct{}, activitySampleConcurrency)
var wg sync.WaitGroup
for _, t := range targets {
wg.Add(1)
sem <- struct{}{}
go func(t activityTarget) {
defer wg.Done()
defer func() { <-sem }()
m.pollAndBump(ctx, t)
}(t)
}
wg.Wait()
}
// pollAndBump fetches one sandbox's activity and refreshes its TTL once it has
// been busy for busyDebounceSamples consecutive samples. Poll failures are
// treated as a non-busy sample: an unreachable envd is handled by the reaper /
// heartbeat paths, and resetting the streak is the safe default.
func (m *Manager) pollAndBump(ctx context.Context, t activityTarget) {
pollCtx, cancel := context.WithTimeout(ctx, activityPollTimeout)
defer cancel()
act, err := t.client.FetchActivity(pollCtx)
busy := err == nil && m.isBusy(act)
m.mu.Lock()
defer m.mu.Unlock()
sb, ok := m.boxes[t.id]
if !ok || sb.Status != models.StatusRunning {
return
}
streak, bump := applyBusySample(sb.activityBusyStreak, busy)
sb.activityBusyStreak = streak
if bump {
sb.LastActiveAt = time.Now()
}
}
// applyBusySample advances a debounce streak with the latest sample and
// reports whether the TTL should be refreshed this tick. A non-busy sample
// resets the streak; the bump fires once the streak reaches the debounce
// threshold and on every busy tick thereafter (the streak is held at the
// threshold rather than growing unbounded).
func applyBusySample(streak int, busy bool) (newStreak int, bump bool) {
if !busy {
return 0, false
}
streak++
if streak >= busyDebounceSamples {
return busyDebounceSamples, true
}
return streak, false
}
// isBusy reports whether a guest liveness snapshot represents real work.
func (m *Manager) isBusy(act *envdclient.Activity) bool {
cpuThreshold := m.cfg.CPUBusyPct
if cpuThreshold <= 0 {
cpuThreshold = defaultCPUBusyPct
}
netFloor := m.cfg.NetFloorBps
if netFloor == 0 {
netFloor = defaultNetFloorBps
}
diskFloor := m.cfg.DiskFloorBps
if diskFloor == 0 {
diskFloor = defaultDiskFloorBps
}
return act.CPUUsedPct >= cpuThreshold ||
act.NetBps >= netFloor ||
act.DiskBps >= diskFloor
}
// Shutdown gracefully drains the manager. Running sandboxes are paused so // Shutdown gracefully drains the manager. Running sandboxes are paused so
// their state survives across agent restarts; any sandboxes still holding // their state survives across agent restarts; any sandboxes still holding
// runtime resources after PauseAll (e.g. paused failed, or status was // runtime resources after PauseAll (e.g. paused failed, or status was

View File

@ -110,7 +110,7 @@ func (m *Manager) initAndStartMemoryLoader(ctx context.Context, sb *sandboxState
slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID) slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID)
return return
} }
if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr); err != nil { if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr, m.cfg.ProxyDomain); err != nil {
slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err) slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err)
} }