v0.2.1 (#55)

Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev> Reviewed-on: wrenn/wrenn#55 Co-authored-by: pptx704 <rafeed@omukk.dev> Co-committed-by: pptx704 <rafeed@omukk.dev>
2026-06-20 22:45:08 +00:00
parent cfc0c52010
commit a08e755e53
53 changed files with 1675 additions and 577 deletions
--- a/.env.example
+++ b/.env.example
@ -17,6 +17,21 @@ WRENN_HOST_INTERFACE=eth0
 WRENN_CP_URL=http://localhost:9725
 WRENN_DEFAULT_ROOTFS_SIZE=5Gi
 WRENN_CH_BIN=/usr/local/bin/cloud-hypervisor
+# Public domain sandboxes are served under; injected into envd so `envd ports`
+# can build {port}-{sandbox_id}.{domain} URLs.
+WRENN_PROXY_DOMAIN=wrenn.dev
+
+# Inactivity activity sampler (all optional; shown values are the defaults).
+# The host polls each running sandbox's guest liveness and refreshes its
+# inactivity TTL when it is doing real work, so a long-running but
+# non-interactive job (build, download) is not auto-paused. A sandbox counts
+# as busy when guest CPU ≥ threshold, or net/disk throughput ≥ the floor.
+# Busy requires the threshold to hold for 2 consecutive samples (debounced),
+# so isolated idle-noise spikes do not keep a sandbox alive.
+WRENN_ACTIVITY_SAMPLE_INTERVAL=5s
+WRENN_CPU_BUSY_THRESHOLD=5.0
+WRENN_NET_FLOOR_BPS=16384
+WRENN_DISK_FLOOR_BPS=32768

 # Auth
 JWT_SECRET=
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## Project Overview

-Wrenn Sandbox is a microVM-based code execution platform. Users create isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Think E2B but with persistent sandboxes, pool-based pricing, and a single-binary deployment story.
+Wrenn is an open-source, self-hosted dev environment platform. Users spin up isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Fast boot, persistent state, and a single agent binary on each host you own.

 ## Build & Development Commands

@ -28,7 +28,7 @@ make dev-envd           # envd in debug mode (port 49983)
 make check              # fmt + vet + lint + test (CI order)
 make test               # Unit tests: go test -race -v ./internal/...
 make test-integration   # Integration tests (require host agent + Cloud Hypervisor)
-make fmt                # gofmt
+make fmt                # gofmt and rust fmt
 make vet                # go vet
 make lint               # golangci-lint

--- a/1
+++ b/1
@ -106,6 +106,7 @@ sqlc:

 fmt:
 	gofmt -w .
+	cargo fmt --manifest-path envd-rs/Cargo.toml

 lint:
 	golangci-lint run ./...
--- a/README.md
+++ b/README.md
@ -2,6 +2,8 @@

 Secure infrastructure for AI

+Wrenn is an open-source self-hosted dev environment platform. Each capsule is a fully isolated virtual machine — booted in seconds, persistent across sessions. Run the control plane anywhere, deploy a single agent binary on each compute host.
+
 ## Prerequisites

 - Linux host with `/dev/kvm` access (bare metal or nested virt)
--- a/2
+++ b/2
@ -1 +1 @@
-0.2.0
+0.2.1
--- a/2
+++ b/2
@ -1 +1 @@
-0.2.0
+0.2.1
--- a/cmd/host-agent/main.go
+++ b/cmd/host-agent/main.go
@ -148,6 +148,13 @@ func main() {
 		VMMBin:              chBin,
 		VMMVersion:          chVersion,
 		AgentVersion:        version,
+		ProxyDomain:         envOrDefault("WRENN_PROXY_DOMAIN", "wrenn.dev"),
+
+		// Activity sampler tuning (all optional; zero → sandbox package default).
+		ActivitySampleInterval: envDuration("WRENN_ACTIVITY_SAMPLE_INTERVAL"),
+		CPUBusyPct:             envFloat32("WRENN_CPU_BUSY_THRESHOLD"),
+		NetFloorBps:            envUint64("WRENN_NET_FLOOR_BPS"),
+		DiskFloorBps:           envUint64("WRENN_DISK_FLOOR_BPS"),
 	}

 	// Remove any *.staging-* / *.trash-* directories left behind by a
@ -171,6 +178,7 @@ func main() {
 	mgr.RestorePausedSandboxes()

 	mgr.StartTTLReaper(ctx)
+	mgr.StartActivitySampler(ctx)

 	// httpServer is declared here so the shutdown func can reference it.
 	// ReadTimeout/WriteTimeout are intentionally omitted — they would kill
@ -311,6 +319,49 @@ func envOrDefault(key, def string) string {
 	return def
 }

+// envDuration parses an optional duration env var (e.g. "5s"). Empty or
+// invalid → zero, letting the sandbox package apply its default.
+func envDuration(key string) time.Duration {
+	v := os.Getenv(key)
+	if v == "" {
+		return 0
+	}
+	d, err := time.ParseDuration(v)
+	if err != nil {
+		slog.Warn("invalid duration env var, using default", "key", key, "value", v)
+		return 0
+	}
+	return d
+}
+
+// envFloat32 parses an optional float env var. Empty or invalid → 0.
+func envFloat32(key string) float32 {
+	v := os.Getenv(key)
+	if v == "" {
+		return 0
+	}
+	f, err := strconv.ParseFloat(v, 32)
+	if err != nil {
+		slog.Warn("invalid float env var, using default", "key", key, "value", v)
+		return 0
+	}
+	return float32(f)
+}
+
+// envUint64 parses an optional unsigned-int env var. Empty or invalid → 0.
+func envUint64(key string) uint64 {
+	v := os.Getenv(key)
+	if v == "" {
+		return 0
+	}
+	n, err := strconv.ParseUint(v, 10, 64)
+	if err != nil {
+		slog.Warn("invalid uint env var, using default", "key", key, "value", v)
+		return 0
+	}
+	return n
+}
+
 // checkPrivileges verifies the process has the required Linux capabilities.
 // Always reads CapEff — even for root — because a root process inside a
 // restricted container (e.g. docker --cap-drop=all) may not have all caps.
--- a/envd-rs/Cargo.lock
+++ b/envd-rs/Cargo.lock
@ -529,7 +529,7 @@ dependencies = [

 [[package]]
 name = "envd"
-version = "0.3.0"
+version = "0.4.0"
 dependencies = [
 "async-stream",
 "axum",
--- a/envd-rs/Cargo.toml
+++ b/envd-rs/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "envd"
-version = "0.3.0"
+version = "0.4.0"
 edition = "2024"
 rust-version = "1.95"

--- a/envd-rs/src/auth/middleware.rs
+++ b/envd-rs/src/auth/middleware.rs
@ -14,6 +14,7 @@ const ACCESS_TOKEN_HEADER: &str = "x-access-token";
 /// Format: "METHOD/path"
 const AUTH_EXCLUDED: &[&str] = &[
    "GET/health",
+    "GET/activity",
    "GET/files",
    "POST/files",
    "POST/init",
@ -21,11 +22,7 @@ const AUTH_EXCLUDED: &[&str] = &[
 ];

 /// Axum middleware that checks X-Access-Token header.
-pub async fn auth_layer(
-    request: Request,
-    next: Next,
-    access_token: Arc<SecureToken>,
-) -> Response {
+pub async fn auth_layer(request: Request, next: Next, access_token: Arc<SecureToken>) -> Response {
    if access_token.is_set() {
        let method = request.method().as_str();
        let path = request.uri().path();
--- a/envd-rs/src/auth/mod.rs
+++ b/envd-rs/src/auth/mod.rs
@ -1,3 +1,3 @@
-pub mod token;
-pub mod signing;
 pub mod middleware;
+pub mod signing;
+pub mod token;
--- a/envd-rs/src/auth/signing.rs
+++ b/envd-rs/src/auth/signing.rs
@ -140,13 +140,32 @@ mod tests {
    #[test]
    fn validate_correct_header_token() {
        let token = test_token(b"secret");
-        assert!(validate_signing(&token, Some("secret"), None, None, "root", "/f", READ_OPERATION).is_ok());
+        assert!(
+            validate_signing(
+                &token,
+                Some("secret"),
+                None,
+                None,
+                "root",
+                "/f",
+                READ_OPERATION
+            )
+            .is_ok()
+        );
    }

    #[test]
    fn validate_wrong_header_token() {
        let token = test_token(b"secret");
-        let result = validate_signing(&token, Some("wrong"), None, None, "root", "/f", READ_OPERATION);
+        let result = validate_signing(
+            &token,
+            Some("wrong"),
+            None,
+            None,
+            "root",
+            "/f",
+            READ_OPERATION,
+        );
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("does not match"));
    }
@ -156,13 +175,32 @@ mod tests {
        let token = test_token(b"secret");
        let exp = far_future();
        let sig = generate_signature(&token, "/file", "root", READ_OPERATION, Some(exp)).unwrap();
-        assert!(validate_signing(&token, None, Some(&sig), Some(exp), "root", "/file", READ_OPERATION).is_ok());
+        assert!(
+            validate_signing(
+                &token,
+                None,
+                Some(&sig),
+                Some(exp),
+                "root",
+                "/file",
+                READ_OPERATION
+            )
+            .is_ok()
+        );
    }

    #[test]
    fn validate_invalid_signature() {
        let token = test_token(b"secret");
-        let result = validate_signing(&token, None, Some("v1_bad"), Some(far_future()), "root", "/f", READ_OPERATION);
+        let result = validate_signing(
+            &token,
+            None,
+            Some("v1_bad"),
+            Some(far_future()),
+            "root",
+            "/f",
+            READ_OPERATION,
+        );
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("invalid signature"));
    }
@ -172,7 +210,15 @@ mod tests {
        let token = test_token(b"secret");
        let expired: i64 = 1_000_000;
        let sig = generate_signature(&token, "/f", "root", READ_OPERATION, Some(expired)).unwrap();
-        let result = validate_signing(&token, None, Some(&sig), Some(expired), "root", "/f", READ_OPERATION);
+        let result = validate_signing(
+            &token,
+            None,
+            Some(&sig),
+            Some(expired),
+            "root",
+            "/f",
+            READ_OPERATION,
+        );
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("expired"));
    }
@ -197,7 +243,18 @@ mod tests {
    fn validate_valid_signature_no_expiration() {
        let token = test_token(b"secret");
        let sig = generate_signature(&token, "/file", "root", READ_OPERATION, None).unwrap();
-        assert!(validate_signing(&token, None, Some(&sig), None, "root", "/file", READ_OPERATION).is_ok());
+        assert!(
+            validate_signing(
+                &token,
+                None,
+                Some(&sig),
+                None,
+                "root",
+                "/file",
+                READ_OPERATION
+            )
+            .is_ok()
+        );
    }

    #[test]
--- a/envd-rs/src/cgroups/mod.rs
+++ b/envd-rs/src/cgroups/mod.rs
@ -19,20 +19,25 @@ pub struct Cgroup2Manager {
 }

 impl Cgroup2Manager {
-    pub fn new(root: &str, configs: &[(ProcessType, &str, &[(&str, &str)])]) -> Result<Self, String> {
+    pub fn new(
+        root: &str,
+        configs: &[(ProcessType, &str, &[(&str, &str)])],
+    ) -> Result<Self, String> {
        let mut fds = HashMap::new();

        for (proc_type, sub_path, properties) in configs {
            let full_path = PathBuf::from(root).join(sub_path);

-            fs::create_dir_all(&full_path).map_err(|e| {
-                format!("failed to create cgroup {}: {e}", full_path.display())
-            })?;
+            fs::create_dir_all(&full_path)
+                .map_err(|e| format!("failed to create cgroup {}: {e}", full_path.display()))?;

            for (name, value) in *properties {
                let prop_path = full_path.join(name);
                fs::write(&prop_path, value).map_err(|e| {
-                    format!("failed to write cgroup property {}: {e}", prop_path.display())
+                    format!(
+                        "failed to write cgroup property {}: {e}",
+                        prop_path.display()
+                    )
                })?;
            }

--- a/envd-rs/src/cmd/mod.rs
+++ b/envd-rs/src/cmd/mod.rs
@ -0,0 +1,5 @@
+//! Client subcommands for the `envd` binary. These run as short-lived
+//! invocations (e.g. `envd ports`) inside the guest, separate from the
+//! long-running daemon, and exit when done.
+
+pub mod ports;
--- a/envd-rs/src/cmd/ports.rs
+++ b/envd-rs/src/cmd/ports.rs
@ -0,0 +1,164 @@
+//! `envd ports` — list the open ports inside the sandbox that are reachable
+//! from outside, alongside the URL each is served at.
+//!
+//! Runs as a one-shot client (not the daemon): it scans `/proc/net/tcp[6]`
+//! directly via the shared port helper and reads the sandbox identity that the
+//! daemon recorded under /run/wrenn at /init time. It refuses to run outside a
+//! wrenn sandbox.
+
+use std::fs;
+use std::path::Path;
+
+use crate::config::{DEFAULT_PORT, DEFAULT_PROXY_DOMAIN, WRENN_RUN_DIR};
+use crate::port::conn::reachable_listening_ports;
+
+/// Arguments for the `envd ports` subcommand.
+#[derive(clap::Args)]
+pub struct PortsArgs {
+    /// Override the proxy domain used to build URLs (default: the domain
+    /// injected by the host, falling back to the built-in default).
+    #[arg(long)]
+    domain: Option<String>,
+
+    /// Emit JSON instead of a table.
+    #[arg(long)]
+    json: bool,
+}
+
+#[derive(serde::Serialize)]
+struct PortEntry {
+    port: u32,
+    url: String,
+}
+
+/// Runs the subcommand and returns the desired process exit code.
+pub fn run(args: &PortsArgs) -> i32 {
+    if !inside_sandbox() {
+        eprintln!("envd ports: not running inside a wrenn sandbox");
+        return 1;
+    }
+
+    let sandbox_id = read_identity("WRENN_SANDBOX_ID", ".WRENN_SANDBOX_ID");
+    let domain = args
+        .domain
+        .clone()
+        .filter(|d| !d.is_empty())
+        .or_else(|| read_identity("WRENN_PROXY_DOMAIN", ".WRENN_PROXY_DOMAIN"))
+        .unwrap_or_else(|| DEFAULT_PROXY_DOMAIN.to_string());
+
+    let entries: Vec<PortEntry> = reachable_listening_ports(DEFAULT_PORT as u32)
+        .into_iter()
+        .map(|port| PortEntry {
+            url: build_url(port, sandbox_id.as_deref(), &domain),
+            port,
+        })
+        .collect();
+
+    if args.json {
+        match serde_json::to_string_pretty(&entries) {
+            Ok(s) => println!("{s}"),
+            Err(e) => {
+                eprintln!("envd ports: failed to encode JSON: {e}");
+                return 1;
+            }
+        }
+        return 0;
+    }
+
+    if entries.is_empty() {
+        println!("No open ports.");
+        return 0;
+    }
+
+    println!("{:<6} {}", "PORT", "URL");
+    for e in &entries {
+        println!("{:<6} {}", e.port, e.url);
+    }
+    0
+}
+
+/// A wrenn sandbox is identified by the marker the daemon writes at startup
+/// (`/run/wrenn/.WRENN_SANDBOX`) and the `WRENN_SANDBOX` env var it exports
+/// into spawned processes. Running `envd ports` on a normal host finds neither
+/// and is refused.
+fn inside_sandbox() -> bool {
+    if std::env::var("WRENN_SANDBOX").as_deref() == Ok("true") {
+        return true;
+    }
+    Path::new(WRENN_RUN_DIR).join(".WRENN_SANDBOX").exists()
+}
+
+/// Reads an identity value from the environment, falling back to the matching
+/// /run/wrenn file. Returns None when neither is set or both are blank.
+fn read_identity(env_key: &str, file_name: &str) -> Option<String> {
+    if let Ok(v) = std::env::var(env_key) {
+        let v = v.trim().to_string();
+        if !v.is_empty() {
+            return Some(v);
+        }
+    }
+    match fs::read_to_string(Path::new(WRENN_RUN_DIR).join(file_name)) {
+        Ok(v) => {
+            let v = v.trim().to_string();
+            if v.is_empty() { None } else { Some(v) }
+        }
+        Err(_) => None,
+    }
+}
+
+/// Builds the externally-reachable URL for a port. With a known sandbox ID the
+/// result is a working https URL; without it (identity not yet injected) the
+/// sandbox-ID segment degrades to a `<sandbox-id>` placeholder so output is
+/// still informative.
+fn build_url(port: u32, sandbox_id: Option<&str>, domain: &str) -> String {
+    let id = sandbox_id.unwrap_or("<sandbox-id>");
+    format!("https://{port}-{id}.{domain}")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn url_with_sandbox_id() {
+        assert_eq!(
+            build_url(8000, Some("cl-abcd1234"), "wrenn.dev"),
+            "https://8000-cl-abcd1234.wrenn.dev"
+        );
+    }
+
+    #[test]
+    fn url_without_sandbox_id_uses_placeholder() {
+        assert_eq!(
+            build_url(5173, None, "wrenn.dev"),
+            "https://5173-<sandbox-id>.wrenn.dev"
+        );
+    }
+
+    #[test]
+    fn url_honors_custom_domain() {
+        assert_eq!(
+            build_url(3000, Some("cl-deadbeef"), "sandbox.example.com"),
+            "https://3000-cl-deadbeef.sandbox.example.com"
+        );
+    }
+
+    #[test]
+    fn read_identity_prefers_env() {
+        // SAFETY: test-local env var, single-threaded test body.
+        unsafe { std::env::set_var("ENVD_PORTS_TEST_ID", "  cl-fromenv  ") };
+        assert_eq!(
+            read_identity("ENVD_PORTS_TEST_ID", ".nonexistent-file"),
+            Some("cl-fromenv".to_string())
+        );
+        unsafe { std::env::remove_var("ENVD_PORTS_TEST_ID") };
+    }
+
+    #[test]
+    fn read_identity_none_when_unset() {
+        assert_eq!(
+            read_identity("ENVD_PORTS_TEST_UNSET", ".nonexistent-file"),
+            None
+        );
+    }
+}
--- a/envd-rs/src/config.rs
+++ b/envd-rs/src/config.rs
@ -7,5 +7,10 @@ pub const PORT_SCANNER_INTERVAL: Duration = Duration::from_millis(1000);
 pub const DEFAULT_USER: &str = "root";
 pub const WRENN_RUN_DIR: &str = "/run/wrenn";

+/// Fallback proxy domain used by `envd ports` to build URLs when the host has
+/// not injected one via /init. Matches the host agent's WRENN_PROXY_DOMAIN
+/// default.
+pub const DEFAULT_PROXY_DOMAIN: &str = "wrenn.dev";
+
 pub const KILOBYTE: u64 = 1024;
 pub const MEGABYTE: u64 = 1024 * KILOBYTE;
--- a/envd-rs/src/crypto/mod.rs
+++ b/envd-rs/src/crypto/mod.rs
@ -1,3 +1,3 @@
+pub mod hmac_sha256;
 pub mod sha256;
 pub mod sha512;
-pub mod hmac_sha256;
--- a/envd-rs/src/crypto/sha256.rs
+++ b/envd-rs/src/crypto/sha256.rs
@ -20,14 +20,22 @@ mod tests {
    const VECTORS: &[(&[u8], &str)] = &[
        (b"", "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"),
        (b"abc", "ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0"),
-        (b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE"),
+        (
+            b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+            "JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE",
+        ),
    ];

    #[test]
    fn known_answer_with_prefix() {
        for (input, expected_b64) in VECTORS {
            let result = hash(input);
-            assert_eq!(result, format!("$sha256${expected_b64}"), "input: {:?}", String::from_utf8_lossy(input));
+            assert_eq!(
+                result,
+                format!("$sha256${expected_b64}"),
+                "input: {:?}",
+                String::from_utf8_lossy(input)
+            );
        }
    }

@ -35,7 +43,12 @@ mod tests {
    fn known_answer_without_prefix() {
        for (input, expected_b64) in VECTORS {
            let result = hash_without_prefix(input);
-            assert_eq!(result, *expected_b64, "input: {:?}", String::from_utf8_lossy(input));
+            assert_eq!(
+                result,
+                *expected_b64,
+                "input: {:?}",
+                String::from_utf8_lossy(input)
+            );
        }
    }

--- a/envd-rs/src/crypto/sha512.rs
+++ b/envd-rs/src/crypto/sha512.rs
@ -15,9 +15,18 @@ mod tests {
    use super::*;

    const VECTORS: &[(&str, &str)] = &[
-        ("", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"),
-        ("abc", "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"),
-        ("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445"),
+        (
+            "",
+            "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
+        ),
+        (
+            "abc",
+            "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f",
+        ),
+        (
+            "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+            "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445",
+        ),
    ];

    #[test]
@ -30,7 +39,10 @@ mod tests {
    #[test]
    fn str_and_bytes_agree() {
        for (input, _) in VECTORS {
-            assert_eq!(hash_access_token(input), hash_access_token_bytes(input.as_bytes()));
+            assert_eq!(
+                hash_access_token(input),
+                hash_access_token_bytes(input.as_bytes())
+            );
        }
    }

@ -38,6 +50,9 @@ mod tests {
    fn output_is_lowercase_hex_128_chars() {
        let h = hash_access_token("anything");
        assert_eq!(h.len(), 128);
-        assert!(h.chars().all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
+        assert!(
+            h.chars()
+                .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
+        );
    }
 }
--- a/envd-rs/src/execcontext.rs
+++ b/envd-rs/src/execcontext.rs
@ -62,7 +62,10 @@ mod tests {

    #[test]
    fn workdir_explicit_overrides_default() {
-        assert_eq!(resolve_default_workdir("/explicit", Some("/default")), "/explicit");
+        assert_eq!(
+            resolve_default_workdir("/explicit", Some("/default")),
+            "/explicit"
+        );
    }

    #[test]
@ -82,7 +85,10 @@ mod tests {

    #[test]
    fn username_explicit_returns_explicit() {
-        assert_eq!(resolve_default_username(Some("root"), "wrenn").unwrap(), "root");
+        assert_eq!(
+            resolve_default_username(Some("root"), "wrenn").unwrap(),
+            "root"
+        );
    }

    #[test]
--- a/envd-rs/src/http/activity.rs
+++ b/envd-rs/src/http/activity.rs
@ -0,0 +1,37 @@
+use std::sync::Arc;
+
+use axum::Json;
+use axum::extract::State;
+use axum::http::header;
+use axum::response::IntoResponse;
+use serde::Serialize;
+
+use crate::state::AppState;
+
+/// Liveness snapshot the host activity sampler polls to decide whether a
+/// sandbox is doing real work. All fields are served straight from atomics
+/// updated by the 1s sampler thread — no syscalls per request, so the host
+/// can poll cheaply at a few-second cadence.
+#[derive(Serialize)]
+pub struct Activity {
+    cpu_count: u32,
+    cpu_used_pct: f32,
+    net_bps: u64,
+    disk_bps: u64,
+}
+
+pub async fn get_activity(State(state): State<Arc<AppState>>) -> impl IntoResponse {
+    tracing::trace!("get activity");
+
+    let body = Activity {
+        cpu_count: state.cpu_count(),
+        cpu_used_pct: state.cpu_used_pct(),
+        net_bps: state.net_bps(),
+        disk_bps: state.disk_bps(),
+    };
+
+    (
+        [(header::CACHE_CONTROL, "no-store")],
+        Json(body),
+    )
+}
--- a/envd-rs/src/http/encoding.rs
+++ b/envd-rs/src/http/encoding.rs
@ -20,7 +20,10 @@ fn parse_encoding_with_quality(value: &str) -> EncodingWithQuality {
        let enc = value[..idx].trim();
        for param in params.split(';') {
            let param = param.trim();
-            if let Some(stripped) = param.strip_prefix("q=").or_else(|| param.strip_prefix("Q=")) {
+            if let Some(stripped) = param
+                .strip_prefix("q=")
+                .or_else(|| param.strip_prefix("Q="))
+            {
                if let Ok(q) = stripped.parse::<f64>() {
                    quality = q;
                }
@ -43,8 +46,10 @@ fn parse_accept_encoding_header(header: &str) -> (Vec<EncodingWithQuality>, bool
        return (Vec::new(), false);
    }

-    let encodings: Vec<EncodingWithQuality> =
-        header.split(',').map(|v| parse_encoding_with_quality(v)).collect();
+    let encodings: Vec<EncodingWithQuality> = header
+        .split(',')
+        .map(|v| parse_encoding_with_quality(v))
+        .collect();

    let mut identity_rejected = false;
    let mut identity_explicitly_accepted = false;
@ -97,7 +102,11 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
    }

    let (mut encodings, identity_rejected) = parse_accept_encoding_header(header);
-    encodings.sort_by(|a, b| b.quality.partial_cmp(&a.quality).unwrap_or(std::cmp::Ordering::Equal));
+    encodings.sort_by(|a, b| {
+        b.quality
+            .partial_cmp(&a.quality)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });

    for eq in &encodings {
        if eq.quality == 0.0 {
@ -121,7 +130,9 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
        return Ok(ENCODING_IDENTITY);
    }

-    Err(format!("no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}"))
+    Err(format!(
+        "no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}"
+    ))
 }

 pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String> {
@ -143,7 +154,9 @@ pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
        return Ok(ENCODING_GZIP);
    }

-    Err(format!("unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}"))
+    Err(format!(
+        "unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}"
+    ))
 }

 #[cfg(test)]
@ -236,17 +249,26 @@ mod tests {

    #[test]
    fn accept_encoding_no_header_returns_identity() {
-        assert_eq!(parse_accept_encoding(&req_no_headers()).unwrap(), "identity");
+        assert_eq!(
+            parse_accept_encoding(&req_no_headers()).unwrap(),
+            "identity"
+        );
    }

    #[test]
    fn accept_encoding_gzip() {
-        assert_eq!(parse_accept_encoding(&req_with_accept("gzip")).unwrap(), "gzip");
+        assert_eq!(
+            parse_accept_encoding(&req_with_accept("gzip")).unwrap(),
+            "gzip"
+        );
    }

    #[test]
    fn accept_encoding_identity_explicit() {
-        assert_eq!(parse_accept_encoding(&req_with_accept("identity")).unwrap(), "identity");
+        assert_eq!(
+            parse_accept_encoding(&req_with_accept("identity")).unwrap(),
+            "identity"
+        );
    }

    #[test]
@ -259,7 +281,10 @@ mod tests {

    #[test]
    fn accept_encoding_wildcard_returns_identity() {
-        assert_eq!(parse_accept_encoding(&req_with_accept("*")).unwrap(), "identity");
+        assert_eq!(
+            parse_accept_encoding(&req_with_accept("*")).unwrap(),
+            "identity"
+        );
    }

    #[test]
@ -277,7 +302,10 @@ mod tests {

    #[test]
    fn accept_encoding_unsupported_only_falls_to_identity() {
-        assert_eq!(parse_accept_encoding(&req_with_accept("br")).unwrap(), "identity");
+        assert_eq!(
+            parse_accept_encoding(&req_with_accept("br")).unwrap(),
+            "identity"
+        );
    }

    // is_identity_acceptable
@ -311,17 +339,26 @@ mod tests {

    #[test]
    fn content_encoding_empty_returns_identity() {
-        assert_eq!(parse_content_encoding(&req_no_headers()).unwrap(), "identity");
+        assert_eq!(
+            parse_content_encoding(&req_no_headers()).unwrap(),
+            "identity"
+        );
    }

    #[test]
    fn content_encoding_gzip() {
-        assert_eq!(parse_content_encoding(&req_with_content("gzip")).unwrap(), "gzip");
+        assert_eq!(
+            parse_content_encoding(&req_with_content("gzip")).unwrap(),
+            "gzip"
+        );
    }

    #[test]
    fn content_encoding_identity_explicit() {
-        assert_eq!(parse_content_encoding(&req_with_content("identity")).unwrap(), "identity");
+        assert_eq!(
+            parse_content_encoding(&req_with_content("identity")).unwrap(),
+            "identity"
+        );
    }

    #[test]
@ -331,6 +368,9 @@ mod tests {

    #[test]
    fn content_encoding_case_insensitive() {
-        assert_eq!(parse_content_encoding(&req_with_content("GZIP")).unwrap(), "gzip");
+        assert_eq!(
+            parse_content_encoding(&req_with_content("GZIP")).unwrap(),
+            "gzip"
+        );
    }
 }
--- a/envd-rs/src/http/envs.rs
+++ b/envd-rs/src/http/envs.rs
@ -18,8 +18,5 @@ pub async fn get_envs(State(state): State<Arc<AppState>>) -> impl IntoResponse {
        .map(|entry| (entry.key().clone(), entry.value().clone()))
        .collect();

-    (
-        [(header::CACHE_CONTROL, "no-store")],
-        Json(envs),
-    )
+    ([(header::CACHE_CONTROL, "no-store")], Json(envs))
 }
--- a/envd-rs/src/http/files.rs
+++ b/envd-rs/src/http/files.rs
@ -72,13 +72,11 @@ pub async fn get_files(
    let header_token = extract_header_token(&req);

    let default_user = state.defaults.user();
-    let username = match execcontext::resolve_default_username(
-        params.username.as_deref(),
-        &default_user,
-    ) {
-        Ok(u) => u.to_string(),
-        Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
-    };
+    let username =
+        match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
+            Ok(u) => u.to_string(),
+            Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
+        };

    if let Err(e) = validate_file_signing(
        &state,
@ -98,8 +96,7 @@ pub async fn get_files(

    let home_dir = user.dir.to_string_lossy().to_string();
    let default_workdir = state.defaults.workdir();
-    let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref())
-    {
+    let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref()) {
        Ok(p) => p,
        Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
    };
@ -177,8 +174,7 @@ pub async fn get_files(
        .unwrap_or("application/octet-stream");

    if use_encoding == "gzip" {
-        let mut encoder =
-            flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
+        let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
        if let Err(e) = encoder.write_all(&file_data) {
            return json_error(
                StatusCode::INTERNAL_SERVER_ERROR,
@ -225,13 +221,11 @@ pub async fn post_files(
    let header_token = extract_header_token(&req);

    let default_user = state.defaults.user();
-    let username = match execcontext::resolve_default_username(
-        params.username.as_deref(),
-        &default_user,
-    ) {
-        Ok(u) => u.to_string(),
-        Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
-    };
+    let username =
+        match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
+            Ok(u) => u.to_string(),
+            Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
+        };

    if let Err(e) = validate_file_signing(
        &state,
@ -283,10 +277,7 @@ pub async fn post_files(
                Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
            }
        } else {
-            let fname = field
-                .file_name()
-                .unwrap_or("upload")
-                .to_string();
+            let fname = field.file_name().unwrap_or("upload").to_string();
            match expand_and_resolve(&fname, &home_dir, default_workdir.as_deref()) {
                Ok(p) => p,
                Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
@ -382,7 +373,7 @@ fn process_file(
            return Err((
                StatusCode::INTERNAL_SERVER_ERROR,
                format!("error getting file info: {e}"),
-            ))
+            ));
        }
    };

@ -395,7 +386,7 @@ fn process_file(
                return Err((
                    StatusCode::INTERNAL_SERVER_ERROR,
                    format!("error changing ownership: {e}"),
-                ))
+                ));
            }
        }
    }
--- a/envd-rs/src/http/init.rs
+++ b/envd-rs/src/http/init.rs
@ -26,6 +26,9 @@ pub struct InitRequest {
    pub volume_mounts: Option<Vec<VolumeMount>>,
    pub sandbox_id: Option<String>,
    pub template_id: Option<String>,
+    /// Public proxy domain (e.g. "wrenn.dev"). Used by `envd ports` to build
+    /// the {port}-{sandbox_id}.{domain} URLs.
+    pub proxy_domain: Option<String>,
    /// New lifecycle identifier for this resume. When it changes between
    /// /init calls, envd treats the call as a post-resume hook: port
    /// forwarder is restarted and NFS mounts are refreshed.
@ -183,14 +186,32 @@ pub async fn post_init(
        // SAFETY: envd is single-threaded at init time; no concurrent env reads.
        unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) };
        write_run_file(".WRENN_SANDBOX_ID", id);
-        state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone());
+        state
+            .defaults
+            .env_vars
+            .insert("WRENN_SANDBOX_ID".into(), id.clone());
    }
    if let Some(ref id) = init_req.template_id {
        tracing::debug!(template_id = %id, "setting template ID from init request");
        // SAFETY: envd is single-threaded at init time; no concurrent env reads.
        unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) };
        write_run_file(".WRENN_TEMPLATE_ID", id);
-        state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone());
+        state
+            .defaults
+            .env_vars
+            .insert("WRENN_TEMPLATE_ID".into(), id.clone());
+    }
+    if let Some(ref domain) = init_req.proxy_domain {
+        if !domain.is_empty() {
+            tracing::debug!(proxy_domain = %domain, "setting proxy domain from init request");
+            // SAFETY: envd is single-threaded at init time; no concurrent env reads.
+            unsafe { std::env::set_var("WRENN_PROXY_DOMAIN", domain) };
+            write_run_file(".WRENN_PROXY_DOMAIN", domain);
+            state
+                .defaults
+                .env_vars
+                .insert("WRENN_PROXY_DOMAIN".into(), domain.clone());
+        }
    }

    (
@ -202,7 +223,10 @@ pub async fn post_init(

 async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> {
    // Fast path: matches existing token
-    if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) {
+    if state.access_token.is_set()
+        && !request_token.is_empty()
+        && state.access_token.equals(request_token)
+    {
        return Ok(());
    }

@ -241,10 +265,7 @@ async fn setup_hyperloop(address: &str, env_vars: &dashmap::DashMap<String, Stri
        }
    }

-    env_vars.insert(
-        "WRENN_EVENTS_ADDRESS".into(),
-        format!("http://{address}"),
-    );
+    env_vars.insert("WRENN_EVENTS_ADDRESS".into(), format!("http://{address}"));
 }

 async fn setup_nfs(nfs_target: &str, path: &str) {
@ -287,7 +308,7 @@ async fn setup_nfs(nfs_target: &str, path: &str) {
 }

 fn write_run_file(name: &str, value: &str) {
-    let dir = std::path::Path::new("/run/wrenn");
+    let dir = std::path::Path::new(crate::config::WRENN_RUN_DIR);
    if let Err(e) = std::fs::create_dir_all(dir) {
        tracing::warn!(error = %e, "failed to create /run/wrenn");
        return;
@ -309,4 +330,3 @@ fn parse_timestamp_to_nanos(ts: &str) -> Result<i64, ()> {
    }
    Err(())
 }
-
--- a/envd-rs/src/http/mod.rs
+++ b/envd-rs/src/http/mod.rs
@ -1,3 +1,4 @@
+pub mod activity;
 pub mod encoding;
 pub mod envs;
 pub mod error;
@ -13,8 +14,8 @@ use std::time::Duration;

 use axum::Router;
 use axum::routing::{get, post};
-use http::header::{CACHE_CONTROL, HeaderName};
 use http::Method;
+use http::header::{CACHE_CONTROL, HeaderName};
 use tower_http::cors::{AllowHeaders, AllowMethods, AllowOrigin, CorsLayer};

 use crate::config::CORS_MAX_AGE;
@ -47,6 +48,7 @@ pub fn router(state: Arc<AppState>) -> Router {

    Router::new()
        .route("/health", get(health::get_health))
+        .route("/activity", get(activity::get_activity))
        .route("/metrics", get(metrics::get_metrics))
        .route("/envs", get(envs::get_envs))
        .route("/init", post(init::post_init))
--- a/envd-rs/src/main.rs
+++ b/envd-rs/src/main.rs
@ -2,6 +2,7 @@

 mod auth;
 mod cgroups;
+mod cmd;
 mod config;
 mod conntracker;
 mod crypto;
@ -39,6 +40,10 @@ const COMMIT: &str = {
 #[derive(Parser)]
 #[command(name = "envd", about = "Wrenn guest agent daemon")]
 struct Cli {
+    /// Client subcommand. When omitted, envd runs as the guest daemon.
+    #[command(subcommand)]
+    command: Option<Commands>,
+
    #[arg(long, default_value_t = DEFAULT_PORT)]
    port: u16,

@ -55,6 +60,12 @@ struct Cli {
    cgroup_root: String,
 }

+#[derive(clap::Subcommand)]
+enum Commands {
+    /// List externally-reachable open ports and the URL each is served at.
+    Ports(cmd::ports::PortsArgs),
+}
+
 #[tokio::main]
 async fn main() {
    let cli = Cli::parse();
@ -68,6 +79,11 @@ async fn main() {
        return;
    }

+    // Client subcommands are short-lived: run and exit before any daemon setup.
+    if let Some(Commands::Ports(args)) = &cli.command {
+        std::process::exit(cmd::ports::run(args));
+    }
+
    logging::init(true);

    if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) {
@ -85,36 +101,35 @@ async fn main() {
    }

    // Cgroup manager
-    let cgroup_manager: Arc<dyn cgroups::CgroupManager> =
-        match cgroups::Cgroup2Manager::new(
-            &cli.cgroup_root,
-            &[
-                (
-                    cgroups::ProcessType::Pty,
-                    "wrenn/pty",
-                    &[] as &[(&str, &str)],
-                ),
-                (
-                    cgroups::ProcessType::User,
-                    "wrenn/user",
-                    &[] as &[(&str, &str)],
-                ),
-                (
-                    cgroups::ProcessType::Socat,
-                    "wrenn/socat",
-                    &[] as &[(&str, &str)],
-                ),
-            ],
-        ) {
-            Ok(m) => {
-                tracing::info!("cgroup2 manager initialized");
-                Arc::new(m)
-            }
-            Err(e) => {
-                tracing::warn!(error = %e, "cgroup2 init failed, using noop");
-                Arc::new(cgroups::NoopCgroupManager)
-            }
-        };
+    let cgroup_manager: Arc<dyn cgroups::CgroupManager> = match cgroups::Cgroup2Manager::new(
+        &cli.cgroup_root,
+        &[
+            (
+                cgroups::ProcessType::Pty,
+                "wrenn/pty",
+                &[] as &[(&str, &str)],
+            ),
+            (
+                cgroups::ProcessType::User,
+                "wrenn/user",
+                &[] as &[(&str, &str)],
+            ),
+            (
+                cgroups::ProcessType::Socat,
+                "wrenn/socat",
+                &[] as &[(&str, &str)],
+            ),
+        ],
+    ) {
+        Ok(m) => {
+            tracing::info!("cgroup2 manager initialized");
+            Arc::new(m)
+        }
+        Err(e) => {
+            tracing::warn!(error = %e, "cgroup2 init failed, using noop");
+            Arc::new(cgroups::NoopCgroupManager)
+        }
+    };

    // Port subsystem
    let port_subsystem = Arc::new(PortSubsystem::new(Arc::clone(&cgroup_manager)));
@ -138,8 +153,7 @@ async fn main() {
    // RPC services (Connect protocol — serves Connect + gRPC + gRPC-Web on same port)
    let connect_router = rpc::rpc_router(Arc::clone(&state));

-    let app = http::router(Arc::clone(&state))
-        .fallback_service(connect_router.into_axum_service());
+    let app = http::router(Arc::clone(&state)).fallback_service(connect_router.into_axum_service());

    // --cmd: spawn initial process if specified
    if !cli.start_cmd.is_empty() {
@ -151,7 +165,12 @@ async fn main() {
    }

    let addr = SocketAddr::from(([0, 0, 0, 0], cli.port));
-    tracing::info!(port = cli.port, version = VERSION, commit = COMMIT, "envd starting");
+    tracing::info!(
+        port = cli.port,
+        version = VERSION,
+        commit = COMMIT,
+        "envd starting"
+    );

    let listener = TcpListener::bind(addr).await.expect("failed to bind");

@ -186,9 +205,7 @@ fn spawn_initial_command(cmd: &str, state: &AppState) {

    let home = user.dir.to_string_lossy().to_string();
    let default_workdir = state.defaults.workdir();
-    let cwd = default_workdir
-        .as_deref()
-        .unwrap_or(&home);
+    let cwd = default_workdir.as_deref().unwrap_or(&home);

    match process_handler::spawn_process(
        cmd,
@ -235,8 +252,7 @@ fn memory_reclaimer(_state: Arc<AppState>) {
            } else {
                let mut sys2 = sysinfo::System::new();
                sys2.refresh_memory();
-                let freed_mb =
-                    sys2.available_memory().saturating_sub(available) / (1024 * 1024);
+                let freed_mb = sys2.available_memory().saturating_sub(available) / (1024 * 1024);
                tracing::info!(used_pct, freed_mb, "page cache dropped");
            }
        }
--- a/envd-rs/src/permissions/mod.rs
+++ b/envd-rs/src/permissions/mod.rs
@ -1,2 +1,2 @@
-pub mod user;
 pub mod path;
+pub mod user;
--- a/envd-rs/src/permissions/path.rs
+++ b/envd-rs/src/permissions/path.rs
@ -94,7 +94,10 @@ mod tests {

    #[test]
    fn tilde_slash_path() {
-        assert_eq!(expand_tilde("~/docs", "/home/user").unwrap(), "/home/user/docs");
+        assert_eq!(
+            expand_tilde("~/docs", "/home/user").unwrap(),
+            "/home/user/docs"
+        );
    }

    #[test]
@ -109,12 +112,18 @@ mod tests {

    #[test]
    fn tilde_relative_no_tilde() {
-        assert_eq!(expand_tilde("relative/path", "/home/u").unwrap(), "relative/path");
+        assert_eq!(
+            expand_tilde("relative/path", "/home/u").unwrap(),
+            "relative/path"
+        );
    }

    #[test]
    fn tilde_cmd_like() {
-        assert_eq!(expand_tilde("~/bin/myapp", "/home/user").unwrap(), "/home/user/bin/myapp");
+        assert_eq!(
+            expand_tilde("~/bin/myapp", "/home/user").unwrap(),
+            "/home/user/bin/myapp"
+        );
    }

    #[test]
@ -144,12 +153,18 @@ mod tests {

    #[test]
    fn resolve_absolute_passthrough() {
-        assert_eq!(expand_and_resolve("/abs/path", "/home", None).unwrap(), "/abs/path");
+        assert_eq!(
+            expand_and_resolve("/abs/path", "/home", None).unwrap(),
+            "/abs/path"
+        );
    }

    #[test]
    fn resolve_empty_uses_default() {
-        assert_eq!(expand_and_resolve("", "/home", Some("/default")).unwrap(), "/default");
+        assert_eq!(
+            expand_and_resolve("", "/home", Some("/default")).unwrap(),
+            "/default"
+        );
    }

    #[test]
@ -161,7 +176,10 @@ mod tests {

    #[test]
    fn resolve_tilde_expands() {
-        assert_eq!(expand_and_resolve("~/dir", "/home/u", None).unwrap(), "/home/u/dir");
+        assert_eq!(
+            expand_and_resolve("~/dir", "/home/u", None).unwrap(),
+            "/home/u/dir"
+        );
    }

    #[test]
--- a/envd-rs/src/port/conn.rs
+++ b/envd-rs/src/port/conn.rs
@ -37,6 +37,36 @@ pub fn read_tcp_connections() -> Vec<ConnStat> {
    conns
 }

+/// Returns the TCP ports in LISTEN state that are reachable from outside the
+/// guest through the host proxy. A port qualifies when it is bound to a
+/// wildcard address (`0.0.0.0`/`::`, directly reachable on the TAP interface)
+/// or to loopback (`127.0.0.1`/`::1`, bridged to the TAP IP by the socat
+/// forwarder). Ports bound to any other specific address are not routable from
+/// the host and are excluded, as is `exclude_port` (envd's own control port).
+/// The result is deduplicated and sorted ascending.
+pub fn reachable_listening_ports(exclude_port: u32) -> Vec<u32> {
+    filter_reachable_ports(&read_tcp_connections(), exclude_port)
+}
+
+fn filter_reachable_ports(conns: &[ConnStat], exclude_port: u32) -> Vec<u32> {
+    let mut ports: Vec<u32> = conns
+        .iter()
+        .filter(|c| c.status == "LISTEN")
+        .filter(|c| is_reachable_bind(&c.local_ip))
+        .map(|c| c.local_port)
+        .filter(|p| *p != exclude_port)
+        .collect();
+    ports.sort_unstable();
+    ports.dedup();
+    ports
+}
+
+/// A bind address is reachable from the host when it is a wildcard (directly
+/// routed via the TAP interface) or loopback (socat-forwarded to the TAP IP).
+fn is_reachable_bind(ip: &str) -> bool {
+    matches!(ip, "0.0.0.0" | "::" | "127.0.0.1" | "::1")
+}
+
 fn parse_proc_net_tcp(path: &str, family: u32) -> io::Result<Vec<ConnStat>> {
    let file = std::fs::File::open(path)?;
    let reader = io::BufReader::new(file);
@ -92,7 +122,10 @@ fn parse_hex_addr(s: &str, family: u32) -> Option<(String, u32)> {
        if ip_bytes.len() != 4 {
            return None;
        }
-        format!("{}.{}.{}.{}", ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0])
+        format!(
+            "{}.{}.{}.{}",
+            ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0]
+        )
    } else {
        if ip_bytes.len() != 16 {
            return None;
@ -257,4 +290,76 @@ mod tests {
    fn parse_nonexistent_file_errors() {
        assert!(parse_proc_net_tcp("/nonexistent/path", libc::AF_INET as u32).is_err());
    }
+
+    // reachable port filtering
+
+    fn conn(ip: &str, port: u32, status: &str) -> ConnStat {
+        ConnStat {
+            local_ip: ip.to_string(),
+            local_port: port,
+            status: status.to_string(),
+            family: libc::AF_INET as u32,
+            inode: 0,
+        }
+    }
+
+    #[test]
+    fn reachable_bind_accepts_wildcard_and_loopback() {
+        assert!(is_reachable_bind("0.0.0.0"));
+        assert!(is_reachable_bind("::"));
+        assert!(is_reachable_bind("127.0.0.1"));
+        assert!(is_reachable_bind("::1"));
+    }
+
+    #[test]
+    fn reachable_bind_rejects_specific_address() {
+        assert!(!is_reachable_bind("192.168.1.5"));
+        assert!(!is_reachable_bind("169.254.0.21"));
+        assert!(!is_reachable_bind("10.0.0.1"));
+    }
+
+    #[test]
+    fn filter_keeps_only_listen_state() {
+        let conns = vec![
+            conn("0.0.0.0", 8000, "LISTEN"),
+            conn("0.0.0.0", 9000, "ESTABLISHED"),
+        ];
+        assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
+    }
+
+    #[test]
+    fn filter_excludes_unreachable_binds() {
+        let conns = vec![
+            conn("127.0.0.1", 8000, "LISTEN"),
+            conn("169.254.0.21", 8001, "LISTEN"), // socat's own listener
+            conn("192.168.1.5", 8002, "LISTEN"),
+        ];
+        assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
+    }
+
+    #[test]
+    fn filter_excludes_envd_control_port() {
+        let conns = vec![
+            conn("0.0.0.0", 49983, "LISTEN"),
+            conn("0.0.0.0", 8000, "LISTEN"),
+        ];
+        assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
+    }
+
+    #[test]
+    fn filter_dedups_and_sorts() {
+        // Same port on IPv4 wildcard and IPv6 loopback collapses to one entry.
+        let conns = vec![
+            conn("::1", 8000, "LISTEN"),
+            conn("0.0.0.0", 8000, "LISTEN"),
+            conn("0.0.0.0", 3000, "LISTEN"),
+        ];
+        assert_eq!(filter_reachable_ports(&conns, 49983), vec![3000, 8000]);
+    }
+
+    #[test]
+    fn filter_empty_when_no_listeners() {
+        let conns = vec![conn("0.0.0.0", 8000, "ESTABLISHED")];
+        assert!(filter_reachable_ports(&conns, 49983).is_empty());
+    }
 }
--- a/envd-rs/src/rpc/entry.rs
+++ b/envd-rs/src/rpc/entry.rs
@ -53,9 +53,7 @@ pub fn build_entry_info(path: &str) -> Result<EntryInfo, ConnectError> {
            Err(_) => FileType::FILE_TYPE_UNSPECIFIED,
        };

-        let target_mode = std::fs::metadata(p)
-            .map(|m| m.mode() & 0o7777)
-            .unwrap_or(0);
+        let target_mode = std::fs::metadata(p).map(|m| m.mode() & 0o7777).unwrap_or(0);

        (target_type, target_mode, Some(target))
    } else {
--- a/envd-rs/src/rpc/filesystem_service.rs
+++ b/envd-rs/src/rpc/filesystem_service.rs
@ -98,8 +98,7 @@ impl Filesystem for FilesystemServiceImpl {
        }

        let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
-        let user =
-            lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
+        let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;

        ensure_dirs(&path, user.uid, user.gid)
            .map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
@ -123,8 +122,7 @@ impl Filesystem for FilesystemServiceImpl {
        let destination = self.resolve_path(request.destination, &ctx)?;

        let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
-        let user =
-            lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
+        let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;

        if let Some(parent) = Path::new(&destination).parent() {
            ensure_dirs(&parent.to_string_lossy(), user.uid, user.gid)
@ -206,7 +204,12 @@ impl Filesystem for FilesystemServiceImpl {
            }
        }

-        Ok((RemoveResponse { ..Default::default() }, ctx))
+        Ok((
+            RemoveResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }

    async fn watch_dir(
@ -247,8 +250,8 @@ impl Filesystem for FilesystemServiceImpl {
        let events: Arc<Mutex<Vec<FilesystemEvent>>> = Arc::new(Mutex::new(Vec::new()));
        let events_cb = Arc::clone(&events);

-        let mut watcher = notify::recommended_watcher(
-            move |res: Result<notify::Event, notify::Error>| {
+        let mut watcher =
+            notify::recommended_watcher(move |res: Result<notify::Event, notify::Error>| {
                if let Ok(event) = res {
                    let event_type = match event.kind {
                        notify::EventKind::Create(_) => EventType::EVENT_TYPE_CREATE,
@ -275,11 +278,13 @@ impl Filesystem for FilesystemServiceImpl {
                        }
                    }
                }
-            },
-        )
-        .map_err(|e| {
-            ConnectError::new(ErrorCode::Internal, format!("failed to create watcher: {e}"))
-        })?;
+            })
+            .map_err(|e| {
+                ConnectError::new(
+                    ErrorCode::Internal,
+                    format!("failed to create watcher: {e}"),
+                )
+            })?;

        let mode = if recursive {
            RecursiveMode::Recursive
@ -342,7 +347,12 @@ impl Filesystem for FilesystemServiceImpl {
    ) -> Result<(RemoveWatcherResponse, Context), ConnectError> {
        let watcher_id: &str = request.watcher_id;
        self.watchers.remove(watcher_id);
-        Ok((RemoveWatcherResponse { ..Default::default() }, ctx))
+        Ok((
+            RemoveWatcherResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }
 }

--- a/envd-rs/src/rpc/mod.rs
+++ b/envd-rs/src/rpc/mod.rs
@ -1,17 +1,17 @@
-pub mod pb;
 pub mod entry;
+pub mod filesystem_service;
+pub mod pb;
 pub mod process_handler;
 pub mod process_service;
-pub mod filesystem_service;

 use std::sync::Arc;

-use crate::rpc::process_service::ProcessServiceImpl;
 use crate::rpc::filesystem_service::FilesystemServiceImpl;
+use crate::rpc::process_service::ProcessServiceImpl;
 use crate::state::AppState;

-use pb::process::ProcessExt;
 use pb::filesystem::FilesystemExt;
+use pb::process::ProcessExt;

 /// Build the connect-rust Router with both RPC services registered.
 pub fn rpc_router(state: Arc<AppState>) -> connectrpc::Router {
--- a/envd-rs/src/rpc/pb.rs
+++ b/envd-rs/src/rpc/pb.rs
@ -1,4 +1,9 @@
-#![allow(dead_code, non_camel_case_types, unused_imports, clippy::derivable_impls)]
+#![allow(
+    dead_code,
+    non_camel_case_types,
+    unused_imports,
+    clippy::derivable_impls
+)]

 use ::buffa;
 use ::buffa_types;
--- a/envd-rs/src/rpc/process_handler.rs
+++ b/envd-rs/src/rpc/process_handler.rs
@ -1,10 +1,11 @@
+use std::collections::VecDeque;
 use std::io::Read;
 use std::os::unix::process::CommandExt;
 use std::process::Stdio;
 use std::sync::{Arc, Mutex};

 use connectrpc::{ConnectError, ErrorCode};
-use nix::pty::{openpty, Winsize};
+use nix::pty::{Winsize, openpty};
 use nix::sys::signal::{self, Signal};
 use nix::unistd::Pid;
 use tokio::sync::broadcast;
@ -15,6 +16,11 @@ const STD_CHUNK_SIZE: usize = 32768;
 const PTY_CHUNK_SIZE: usize = 16384;
 const BROADCAST_CAPACITY: usize = 4096;

+// Upper bound on the per-process output kept for replay. A late Connect gets
+// the most recent OUTPUT_LOG_CAPACITY bytes (older output is evicted) so the
+// buffer can never grow without bound for a chatty long-running process.
+const OUTPUT_LOG_CAPACITY: usize = 256 * 1024;
+
 #[derive(Clone)]
 pub enum DataEvent {
    Stdout(Vec<u8>),
@ -30,6 +36,37 @@ pub struct EndEvent {
    pub error: Option<String>,
 }

+/// Bounded ring of recent output, kept so a late Connect can replay what it
+/// missed. Evicts oldest events once the retained bytes exceed the cap.
+#[derive(Default)]
+struct OutputLog {
+    events: VecDeque<DataEvent>,
+    bytes: usize,
+}
+
+impl OutputLog {
+    fn push(&mut self, ev: &DataEvent) {
+        self.bytes += ev_len(ev);
+        self.events.push_back(ev.clone());
+        while self.bytes > OUTPUT_LOG_CAPACITY {
+            match self.events.pop_front() {
+                Some(old) => self.bytes -= ev_len(&old),
+                None => break,
+            }
+        }
+    }
+
+    fn snapshot(&self) -> Vec<DataEvent> {
+        self.events.iter().cloned().collect()
+    }
+}
+
+fn ev_len(ev: &DataEvent) -> usize {
+    match ev {
+        DataEvent::Stdout(d) | DataEvent::Stderr(d) | DataEvent::Pty(d) => d.len(),
+    }
+}
+
 pub struct ProcessHandle {
    pub config: ProcessConfig,
    pub tag: Option<String>,
@ -38,6 +75,7 @@ pub struct ProcessHandle {
    data_tx: broadcast::Sender<DataEvent>,
    end_tx: broadcast::Sender<EndEvent>,
    ended: Mutex<Option<EndEvent>>,
+    output_log: Mutex<OutputLog>,

    stdin: Mutex<Option<std::process::ChildStdin>>,
    pty_master: Mutex<Option<std::fs::File>>,
@ -48,6 +86,26 @@ impl ProcessHandle {
        self.data_tx.subscribe()
    }

+    /// Append a chunk to the replay buffer and broadcast it live, under one
+    /// lock. The shared lock is what makes [`subscribe_data_replay`] race-free:
+    /// a concurrent attach sees this chunk either in its snapshot or on its live
+    /// receiver — never both, never neither.
+    pub fn publish_data(&self, ev: DataEvent) {
+        let mut log = self.output_log.lock().unwrap();
+        log.push(&ev);
+        let _ = self.data_tx.send(ev);
+    }
+
+    /// Snapshot the buffered output and subscribe to live output atomically, so
+    /// a late Connect replays what it missed and then continues live with no gap
+    /// or duplicate across the handoff.
+    pub fn subscribe_data_replay(&self) -> (Vec<DataEvent>, broadcast::Receiver<DataEvent>) {
+        let log = self.output_log.lock().unwrap();
+        let snapshot = log.snapshot();
+        let rx = self.data_tx.subscribe();
+        (snapshot, rx)
+    }
+
    pub fn subscribe_end(&self) -> broadcast::Receiver<EndEvent> {
        self.end_tx.subscribe()
    }
@ -160,6 +218,9 @@ pub fn spawn_process(
    env.push(("HOME".into(), home));
    env.push(("USER".into(), user.name.clone()));
    env.push(("LOGNAME".into(), user.name.clone()));
+    if !user.shell.as_os_str().is_empty() {
+        env.push(("SHELL".into(), user.shell.to_string_lossy().to_string()));
+    }

    default_env_vars.iter().for_each(|entry| {
        env.push((entry.key().clone(), entry.value().clone()));
@ -179,21 +240,40 @@ pub fn spawn_process(
    let nice_delta = 0 - current_nice();
    let profile_source = r#"test -f /etc/profile && . /etc/profile
 test -f "${HOME}/.bashrc" && . "${HOME}/.bashrc""#;
-    let oom_script = if nice_delta > 0 {
-        format!(
-            r#"echo 100 > /proc/$$/oom_score_adj
-{}
-exec /usr/bin/nice -n {} "${{@}}""#,
-            profile_source, nice_delta,
-        )
-    } else {
-        format!(
-            r#"echo 100 > /proc/$$/oom_score_adj
-{}
-exec "$@""#,
-            profile_source
-        )
+
+    // Resolve the user's login shell, falling back to /bin/sh. Commands without
+    // explicit args are interpreted by this shell so pipes, quoting, escape
+    // sequences, backslash line-continuations, and other shell syntax work
+    // without the caller having to wrap them in `sh -c` themselves.
+    let shell = {
+        let s = user.shell.to_string_lossy();
+        if s.is_empty() {
+            "/bin/sh".to_string()
+        } else {
+            s.to_string()
+        }
    };
+
+    // What the wrapper finally exec's, after the optional `nice` prefix.
+    //   - no args: run cmd_str as a shell command line via the login shell
+    //     ($1 is cmd_str; $0 of the inner shell is the shell path).
+    //   - with args: exec the program + args directly, no shell interpretation
+    //     (backward-compatible program/argv form).
+    let target = if args.is_empty() {
+        format!(r#""{shell}" -c "$1" "{shell}""#)
+    } else {
+        r#""$@""#.to_string()
+    };
+    let nice_prefix = if nice_delta > 0 {
+        format!("/usr/bin/nice -n {nice_delta} ")
+    } else {
+        String::new()
+    };
+    let oom_script = format!(
+        r#"echo 100 > /proc/$$/oom_score_adj
+{profile_source}
+exec {nice_prefix}{target}"#
+    );
    let mut wrapper_args = vec![
        "-c".to_string(),
        oom_script,
@ -264,7 +344,10 @@ exec "$@""#,
        command.stderr(Stdio::null());

        let child = command.spawn().map_err(|e| {
-            ConnectError::new(ErrorCode::Internal, format!("error starting pty process: {e}"))
+            ConnectError::new(
+                ErrorCode::Internal,
+                format!("error starting pty process: {e}"),
+            )
        })?;

        drop(slave_fd);
@ -280,6 +363,7 @@ exec "$@""#,
            data_tx: data_tx.clone(),
            end_tx: end_tx.clone(),
            ended: Mutex::new(None),
+            output_log: Mutex::new(OutputLog::default()),
            stdin: Mutex::new(None),
            pty_master: Mutex::new(Some(master_file)),
        });
@ -287,7 +371,7 @@ exec "$@""#,
        let data_rx = handle.subscribe_data();
        let end_rx = handle.subscribe_end();

-        let data_tx_clone = data_tx.clone();
+        let handle_for_reader = Arc::clone(&handle);
        let pty_reader = std::thread::spawn(move || {
            let mut master = master_clone;
            let mut buf = vec![0u8; PTY_CHUNK_SIZE];
@ -295,7 +379,7 @@ exec "$@""#,
                match master.read(&mut buf) {
                    Ok(0) => break,
                    Ok(n) => {
-                        let _ = data_tx_clone.send(DataEvent::Pty(buf[..n].to_vec()));
+                        handle_for_reader.publish_data(DataEvent::Pty(buf[..n].to_vec()));
                    }
                    Err(_) => break,
                }
@ -329,7 +413,11 @@ exec "$@""#,
        });

        tracing::info!(pid, cmd = cmd_str, "process started (pty)");
-        Ok(SpawnedProcess { handle, data_rx, end_rx })
+        Ok(SpawnedProcess {
+            handle,
+            data_rx,
+            end_rx,
+        })
    } else {
        let mut command = std::process::Command::new("/bin/bash");
        command
@ -375,6 +463,7 @@ exec "$@""#,
            data_tx: data_tx.clone(),
            end_tx: end_tx.clone(),
            ended: Mutex::new(None),
+            output_log: Mutex::new(OutputLog::default()),
            stdin: Mutex::new(stdin),
            pty_master: Mutex::new(None),
        });
@ -385,14 +474,14 @@ exec "$@""#,
        let mut output_readers: Vec<std::thread::JoinHandle<()>> = Vec::new();

        if let Some(mut out) = stdout {
-            let tx = data_tx.clone();
+            let handle_for_reader = Arc::clone(&handle);
            output_readers.push(std::thread::spawn(move || {
                let mut buf = vec![0u8; STD_CHUNK_SIZE];
                loop {
                    match out.read(&mut buf) {
                        Ok(0) => break,
                        Ok(n) => {
-                            let _ = tx.send(DataEvent::Stdout(buf[..n].to_vec()));
+                            handle_for_reader.publish_data(DataEvent::Stdout(buf[..n].to_vec()));
                        }
                        Err(_) => break,
                    }
@ -401,14 +490,14 @@ exec "$@""#,
        }

        if let Some(mut err_pipe) = stderr {
-            let tx = data_tx.clone();
+            let handle_for_reader = Arc::clone(&handle);
            output_readers.push(std::thread::spawn(move || {
                let mut buf = vec![0u8; STD_CHUNK_SIZE];
                loop {
                    match err_pipe.read(&mut buf) {
                        Ok(0) => break,
                        Ok(n) => {
-                            let _ = tx.send(DataEvent::Stderr(buf[..n].to_vec()));
+                            handle_for_reader.publish_data(DataEvent::Stderr(buf[..n].to_vec()));
                        }
                        Err(_) => break,
                    }
@ -444,7 +533,11 @@ exec "$@""#,
        });

        tracing::info!(pid, cmd = cmd_str, "process started (pipe)");
-        Ok(SpawnedProcess { handle, data_rx, end_rx })
+        Ok(SpawnedProcess {
+            handle,
+            data_rx,
+            end_rx,
+        })
    }
 }

--- a/envd-rs/src/rpc/process_service.rs
+++ b/envd-rs/src/rpc/process_service.rs
@ -4,7 +4,8 @@ use std::sync::Arc;

 use connectrpc::{ConnectError, Context, ErrorCode};
 use dashmap::DashMap;
-use futures::Stream;
+use futures::{Stream, StreamExt};
+use tokio::sync::broadcast;

 use crate::permissions::path::{expand_and_resolve, expand_tilde};
 use crate::permissions::user::lookup_user;
@ -72,8 +73,7 @@ impl ProcessServiceImpl {
        })?;

        let username = self.state.defaults.user();
-        let user =
-            lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
+        let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;

        let cmd_raw: &str = proc_config.cmd;
        let args_raw: Vec<String> = proc_config.args.iter().map(|s| s.to_string()).collect();
@ -87,7 +87,8 @@ impl ProcessServiceImpl {

        let cmd = expand_tilde(cmd_raw, &home_dir)
            .map_err(|e| ConnectError::new(ErrorCode::InvalidArgument, e))?;
-        let args: Vec<String> = args_raw.into_iter()
+        let args: Vec<String> = args_raw
+            .into_iter()
            .map(|a| expand_tilde(&a, &home_dir).unwrap_or(a))
            .collect();

@ -136,7 +137,8 @@ impl ProcessServiceImpl {
            &self.state.defaults.env_vars,
        )?;

-        self.processes.insert(spawned.handle.pid, Arc::clone(&spawned.handle));
+        self.processes
+            .insert(spawned.handle.pid, Arc::clone(&spawned.handle));

        let processes = Arc::clone(&self.processes);
        let pid = spawned.handle.pid;
@ -203,50 +205,10 @@ impl Process for ProcessServiceImpl {
        let spawned = self.spawn_from_request(&request)?;
        let pid = spawned.handle.pid;

-        let mut data_rx = spawned.data_rx;
-        let mut end_rx = spawned.end_rx;
-
-        let stream = async_stream::stream! {
-            yield Ok(make_start_response(pid));
-
-            loop {
-                tokio::select! {
-                    biased;
-                    data = data_rx.recv() => {
-                        match data {
-                            Ok(ev) => yield Ok(make_data_start_response(ev)),
-                            Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
-                            Err(tokio::sync::broadcast::error::RecvError::Closed) => {
-                                // Data channel closed: the process ended and its
-                                // handle was dropped. The end event is published
-                                // before the handle drop, so it is still buffered
-                                // — emit it rather than losing the exit code.
-                                if let Ok(end) = end_rx.try_recv() {
-                                    yield Ok(make_end_start_response(end));
-                                }
-                                break;
-                            }
-                        }
-                    }
-                    end = end_rx.recv() => {
-                        // Process ended. The waiter joins the output readers
-                        // before sending this event, so every byte is already
-                        // in the data channel — drain it fully before the end.
-                        loop {
-                            match data_rx.try_recv() {
-                                Ok(ev) => yield Ok(make_data_start_response(ev)),
-                                Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
-                                Err(_) => break,
-                            }
-                        }
-                        if let Ok(end) = end {
-                            yield Ok(make_end_start_response(end));
-                        }
-                        break;
-                    }
-                }
-            }
-        };
+        // Start subscribes before any output is produced, so there is nothing to
+        // replay and the process cannot have ended yet.
+        let stream = process_event_stream(pid, Vec::new(), spawned.data_rx, spawned.end_rx, None)
+            .map(|r| r.map(wrap_start_response));

        Ok((Box::pin(stream), ctx))
    }
@ -268,81 +230,17 @@ impl Process for ProcessServiceImpl {
        let handle = self.get_process_by_selector(selector)?;
        let pid = handle.pid;

-        let mut data_rx = handle.subscribe_data();
-        let mut end_rx = handle.subscribe_end();
+        // Snapshot buffered output + subscribe live atomically, then read the
+        // exit state. Ordering matters: end_rx must be subscribed before
+        // cached_end is read so a process that exits in the window is still
+        // observed (via the channel if subscribed in time, via cached_end
+        // otherwise).
+        let (replay, data_rx) = handle.subscribe_data_replay();
+        let end_rx = handle.subscribe_end();
        let cached_end = handle.cached_end();

-        let stream = async_stream::stream! {
-            yield Ok(ConnectResponse {
-                event: buffa::MessageField::some(ProcessEvent {
-                    event: Some(process_event::Event::Start(Box::new(
-                        process_event::StartEvent { pid, ..Default::default() },
-                    ))),
-                    ..Default::default()
-                }),
-                ..Default::default()
-            });
-
-            if let Some(end) = cached_end {
-                yield Ok(ConnectResponse {
-                    event: buffa::MessageField::some(make_end_event(end)),
-                    ..Default::default()
-                });
-            } else {
-                loop {
-                    tokio::select! {
-                        biased;
-                        data = data_rx.recv() => {
-                            match data {
-                                Ok(ev) => {
-                                    yield Ok(ConnectResponse {
-                                        event: buffa::MessageField::some(make_data_event(ev)),
-                                        ..Default::default()
-                                    });
-                                }
-                                Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
-                                Err(tokio::sync::broadcast::error::RecvError::Closed) => {
-                                    // Data channel closed: the process ended and
-                                    // its handle was dropped. The end event is
-                                    // published before the handle drop, so it is
-                                    // still buffered — emit it rather than losing
-                                    // the exit code.
-                                    if let Ok(end) = end_rx.try_recv() {
-                                        yield Ok(ConnectResponse {
-                                            event: buffa::MessageField::some(make_end_event(end)),
-                                            ..Default::default()
-                                        });
-                                    }
-                                    break;
-                                }
-                            }
-                        }
-                        end = end_rx.recv() => {
-                            // Process ended. The waiter joins the output readers
-                            // before sending this event, so every byte is already
-                            // in the data channel — drain it fully before the end.
-                            loop {
-                                match data_rx.try_recv() {
-                                    Ok(ev) => yield Ok(ConnectResponse {
-                                        event: buffa::MessageField::some(make_data_event(ev)),
-                                        ..Default::default()
-                                    }),
-                                    Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
-                                    Err(_) => break,
-                                }
-                            }
-                            if let Ok(end) = end {
-                                yield Ok(ConnectResponse {
-                                    event: buffa::MessageField::some(make_end_event(end)),
-                                    ..Default::default()
-                                });
-                            }
-                            break;
-                        }
-                    }
-                }
-            }
-        };
+        let stream = process_event_stream(pid, replay, data_rx, end_rx, cached_end)
+            .map(|r| r.map(wrap_connect_response));

        Ok((Box::pin(stream), ctx))
    }
@ -363,7 +261,12 @@ impl Process for ProcessServiceImpl {
            }
        }

-        Ok((UpdateResponse { ..Default::default() }, ctx))
+        Ok((
+            UpdateResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }

    async fn stream_input(
@ -372,11 +275,11 @@ impl Process for ProcessServiceImpl {
        mut requests: Pin<
            Box<
                dyn Stream<
-                    Item = Result<
-                        buffa::view::OwnedView<StreamInputRequestView<'static>>,
-                        ConnectError,
-                    >,
-                > + Send,
+                        Item = Result<
+                            buffa::view::OwnedView<StreamInputRequestView<'static>>,
+                            ConnectError,
+                        >,
+                    > + Send,
            >,
        >,
    ) -> Result<(StreamInputResponse, Context), ConnectError> {
@ -405,7 +308,12 @@ impl Process for ProcessServiceImpl {
            }
        }

-        Ok((StreamInputResponse { ..Default::default() }, ctx))
+        Ok((
+            StreamInputResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }

    async fn send_input(
@ -422,7 +330,12 @@ impl Process for ProcessServiceImpl {
            write_input(&handle, input)?;
        }

-        Ok((SendInputResponse { ..Default::default() }, ctx))
+        Ok((
+            SendInputResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }

    async fn send_signal(
@ -442,12 +355,17 @@ impl Process for ProcessServiceImpl {
                return Err(ConnectError::new(
                    ErrorCode::InvalidArgument,
                    "invalid or unspecified signal",
-                ))
+                ));
            }
        };

        handle.send_signal(sig)?;
-        Ok((SendSignalResponse { ..Default::default() }, ctx))
+        Ok((
+            SendSignalResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }

    async fn close_stdin(
@ -460,7 +378,12 @@ impl Process for ProcessServiceImpl {
        })?;
        let handle = self.get_process_by_selector(selector)?;
        handle.close_stdin()?;
-        Ok((CloseStdinResponse { ..Default::default() }, ctx))
+        Ok((
+            CloseStdinResponse {
+                ..Default::default()
+            },
+            ctx,
+        ))
    }
 }

@ -472,17 +395,106 @@ fn write_input(handle: &ProcessHandle, input: &ProcessInputView) -> Result<(), C
    }
 }

-fn make_start_response(pid: u32) -> StartResponse {
+/// Shared event pump for `Start` and `Connect`. Yields a leading start event,
+/// replays any buffered output (empty for `Start`), then forwards live output
+/// and the final exit event. The caller wraps each `ProcessEvent` into its own
+/// response envelope, so the streaming logic lives in exactly one place.
+fn process_event_stream(
+    pid: u32,
+    replay: Vec<DataEvent>,
+    mut data_rx: broadcast::Receiver<DataEvent>,
+    mut end_rx: broadcast::Receiver<process_handler::EndEvent>,
+    cached_end: Option<process_handler::EndEvent>,
+) -> impl Stream<Item = Result<ProcessEvent, ConnectError>> {
+    use broadcast::error::{RecvError, TryRecvError};
+
+    async_stream::stream! {
+        yield Ok(make_start_event(pid));
+
+        for ev in replay {
+            yield Ok(make_data_event(ev));
+        }
+
+        // Process already exited before we attached. The snapshot above covers
+        // output up to the attach point; drain anything the live receiver
+        // buffered after the snapshot, then emit the cached exit. end_rx may
+        // never deliver here — a broadcast receiver only sees events sent after
+        // it subscribed, and the exit can predate that — so cached_end is the
+        // source of truth.
+        if let Some(end) = cached_end {
+            loop {
+                match data_rx.try_recv() {
+                    Ok(ev) => yield Ok(make_data_event(ev)),
+                    Err(TryRecvError::Lagged(_)) => continue,
+                    Err(_) => break,
+                }
+            }
+            yield Ok(make_end_event(end));
+            return;
+        }
+
+        loop {
+            tokio::select! {
+                biased;
+                data = data_rx.recv() => {
+                    match data {
+                        Ok(ev) => yield Ok(make_data_event(ev)),
+                        Err(RecvError::Lagged(_)) => continue,
+                        Err(RecvError::Closed) => {
+                            // Data channel closed: the process ended and its
+                            // handle was dropped. The end event is published
+                            // before the handle drop, so it is still buffered —
+                            // emit it rather than losing the exit code.
+                            if let Ok(end) = end_rx.try_recv() {
+                                yield Ok(make_end_event(end));
+                            }
+                            break;
+                        }
+                    }
+                }
+                end = end_rx.recv() => {
+                    // Process ended. The waiter joins the output readers before
+                    // sending this event, so every byte is already in the data
+                    // channel — drain it fully before the end.
+                    loop {
+                        match data_rx.try_recv() {
+                            Ok(ev) => yield Ok(make_data_event(ev)),
+                            Err(TryRecvError::Lagged(_)) => continue,
+                            Err(_) => break,
+                        }
+                    }
+                    if let Ok(end) = end {
+                        yield Ok(make_end_event(end));
+                    }
+                    break;
+                }
+            }
+        }
+    }
+}
+
+fn wrap_start_response(event: ProcessEvent) -> StartResponse {
    StartResponse {
-        event: buffa::MessageField::some(ProcessEvent {
-            event: Some(process_event::Event::Start(Box::new(
-                process_event::StartEvent {
-                    pid,
-                    ..Default::default()
-                },
-            ))),
-            ..Default::default()
-        }),
+        event: buffa::MessageField::some(event),
+        ..Default::default()
+    }
+}
+
+fn wrap_connect_response(event: ProcessEvent) -> ConnectResponse {
+    ConnectResponse {
+        event: buffa::MessageField::some(event),
+        ..Default::default()
+    }
+}
+
+fn make_start_event(pid: u32) -> ProcessEvent {
+    ProcessEvent {
+        event: Some(process_event::Event::Start(Box::new(
+            process_event::StartEvent {
+                pid,
+                ..Default::default()
+            },
+        ))),
        ..Default::default()
    }
 }
@ -504,13 +516,6 @@ fn make_data_event(ev: DataEvent) -> ProcessEvent {
    }
 }

-fn make_data_start_response(ev: DataEvent) -> StartResponse {
-    StartResponse {
-        event: buffa::MessageField::some(make_data_event(ev)),
-        ..Default::default()
-    }
-}
-
 fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
    ProcessEvent {
        event: Some(process_event::Event::End(Box::new(
@ -526,13 +531,6 @@ fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
    }
 }

-fn make_end_start_response(end: process_handler::EndEvent) -> StartResponse {
-    StartResponse {
-        event: buffa::MessageField::some(make_end_event(end)),
-        ..Default::default()
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@ -589,7 +587,8 @@ mod tests {
    fn args_other_user_left_literal() {
        let home_dir = "/home/testuser";
        let args_raw = vec!["~other".to_string(), "~other/path".to_string()];
-        let args: Vec<String> = args_raw.into_iter()
+        let args: Vec<String> = args_raw
+            .into_iter()
            .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
            .collect();
        assert_eq!(args, vec!["~other", "~other/path"]);
@ -618,17 +617,22 @@ mod tests {
            "/tmp/out".to_string(),
            "~other".to_string(),
        ];
-        let args: Vec<String> = args_raw.into_iter()
+        let args: Vec<String> = args_raw
+            .into_iter()
            .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
            .collect();
-        assert_eq!(args, vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]);
+        assert_eq!(
+            args,
+            vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]
+        );
    }

    #[test]
    fn args_empty_passthrough() {
        let home_dir = "/home/testuser";
        let args_raw: Vec<String> = vec![];
-        let args: Vec<String> = args_raw.into_iter()
+        let args: Vec<String> = args_raw
+            .into_iter()
            .map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
            .collect();
        assert!(args.is_empty());
--- a/envd-rs/src/state.rs
+++ b/envd-rs/src/state.rs
@ -1,4 +1,4 @@
-use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicU8, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};

 use crate::auth::token::SecureToken;
@ -17,6 +17,11 @@ pub struct AppState {
    pub port_subsystem: Option<Arc<PortSubsystem>>,
    pub cpu_used_pct: AtomicU32,
    pub cpu_count: AtomicU32,
+    /// Whole-VM IO throughput, bytes/sec, sampled over the last 1s tick. Used
+    /// by the host activity sampler to keep IO-bound-but-CPU-idle workloads
+    /// (e.g. a long download) from being mistaken for inactive.
+    pub net_bps: AtomicU64,
+    pub disk_bps: AtomicU64,

    /// Memory preload coordination. The host agent POSTs /memory/preload after
    /// a snapshot restore to materialise every physical page (so the next
@ -56,6 +61,8 @@ impl AppState {
            port_subsystem,
            cpu_used_pct: AtomicU32::new(0),
            cpu_count: AtomicU32::new(0),
+            net_bps: AtomicU64::new(0),
+            disk_bps: AtomicU64::new(0),
            mem_preload_started: AtomicBool::new(false),
            mem_preload_done: AtomicBool::new(false),
            mem_preload_cancel: AtomicBool::new(false),
@ -70,7 +77,7 @@ impl AppState {

        let state_clone = Arc::clone(&state);
        std::thread::spawn(move || {
-            cpu_sampler(state_clone);
+            activity_sampler(state_clone);
        });

        state
@ -84,6 +91,14 @@ impl AppState {
        self.cpu_count.load(Ordering::Relaxed)
    }

+    pub fn net_bps(&self) -> u64 {
+        self.net_bps.load(Ordering::Relaxed)
+    }
+
+    pub fn disk_bps(&self) -> u64 {
+        self.disk_bps.load(Ordering::Relaxed)
+    }
+
    /// Records a new lifecycle ID, returning true if it changed (i.e. this
    /// is the first /init since a resume). First-ever call returns false:
    /// boot-time /init doesn't need port-subsystem restart since the
@ -99,12 +114,16 @@ impl AppState {
    }
 }

-fn cpu_sampler(state: Arc<AppState>) {
+fn activity_sampler(state: Arc<AppState>) {
    use sysinfo::System;

    let mut sys = System::new();
    sys.refresh_cpu_all();

+    // Cumulative IO counters from the previous tick. None until the first read.
+    let mut prev_net: Option<u64> = read_net_bytes();
+    let mut prev_disk: Option<u64> = read_disk_bytes();
+
    loop {
        std::thread::sleep(std::time::Duration::from_secs(1));

@ -123,5 +142,73 @@ fn cpu_sampler(state: Arc<AppState>) {
        state
            .cpu_count
            .store(sys.cpus().len() as u32, Ordering::Relaxed);
+
+        // Throughput = cumulative-counter delta over the ~1s tick. Counters can
+        // reset across a snapshot restore; a wrapped/negative delta reads as 0.
+        let cur_net = read_net_bytes();
+        let net_bps = match (prev_net, cur_net) {
+            (Some(p), Some(c)) => c.saturating_sub(p),
+            _ => 0,
+        };
+        prev_net = cur_net;
+
+        let cur_disk = read_disk_bytes();
+        let disk_bps = match (prev_disk, cur_disk) {
+            (Some(p), Some(c)) => c.saturating_sub(p),
+            _ => 0,
+        };
+        prev_disk = cur_disk;
+
+        state.net_bps.store(net_bps, Ordering::Relaxed);
+        state.disk_bps.store(disk_bps, Ordering::Relaxed);
    }
 }
+
+/// Sum of rx+tx bytes across all non-loopback interfaces, from /proc/net/dev.
+/// Returns None if the file can't be read/parsed.
+fn read_net_bytes() -> Option<u64> {
+    let content = std::fs::read_to_string("/proc/net/dev").ok()?;
+    let mut total: u64 = 0;
+    // First two lines are headers.
+    for line in content.lines().skip(2) {
+        let Some((iface, rest)) = line.split_once(':') else {
+            continue;
+        };
+        if iface.trim() == "lo" {
+            continue;
+        }
+        let fields: Vec<&str> = rest.split_whitespace().collect();
+        // Column 0 = rx bytes, column 8 = tx bytes.
+        if let Some(rx) = fields.first().and_then(|v| v.parse::<u64>().ok()) {
+            total = total.saturating_add(rx);
+        }
+        if let Some(tx) = fields.get(8).and_then(|v| v.parse::<u64>().ok()) {
+            total = total.saturating_add(tx);
+        }
+    }
+    Some(total)
+}
+
+/// Sum of sectors read+written across all block devices, ×512, from
+/// /proc/diskstats. Skips partitions and loop/ram devices to avoid double
+/// counting. Returns None if the file can't be read/parsed.
+fn read_disk_bytes() -> Option<u64> {
+    let content = std::fs::read_to_string("/proc/diskstats").ok()?;
+    let mut sectors: u64 = 0;
+    for line in content.lines() {
+        let fields: Vec<&str> = line.split_whitespace().collect();
+        // 0=major 1=minor 2=name ... 5=sectors read ... 9=sectors written.
+        if fields.len() < 10 {
+            continue;
+        }
+        let name = fields[2];
+        if name.starts_with("loop") || name.starts_with("ram") {
+            continue;
+        }
+        let read = fields[5].parse::<u64>().unwrap_or(0);
+        let written = fields[9].parse::<u64>().unwrap_or(0);
+        sectors = sectors.saturating_add(read).saturating_add(written);
+    }
+    // Linux reports diskstats sectors in fixed 512-byte units.
+    Some(sectors.saturating_mul(512))
+}
--- a/envd-rs/src/util.rs
+++ b/envd-rs/src/util.rs
@ -23,12 +23,10 @@ impl AtomicMax {
            if new <= current {
                return false;
            }
-            match self.val.compare_exchange_weak(
-                current,
-                new,
-                Ordering::Release,
-                Ordering::Relaxed,
-            ) {
+            match self
+                .val
+                .compare_exchange_weak(current, new, Ordering::Release, Ordering::Relaxed)
+            {
                Ok(_) => return true,
                Err(_) => continue,
            }
--- a/frontend/src/routes/admin/hosts/+page.svelte
+++ b/frontend/src/routes/admin/hosts/+page.svelte
@ -53,14 +53,15 @@
 	let byocPageCount = $derived(Math.max(1, Math.ceil(flatByocHosts.length / PAGE_SIZE)));
 	let byocPageHosts = $derived(flatByocHosts.slice(byocPage * PAGE_SIZE, (byocPage + 1) * PAGE_SIZE));

-	// Stats across all hosts
-	let onlineCount = $derived(allHosts.filter((h) => h.status === 'online').length);
-	let pendingCount = $derived(allHosts.filter((h) => h.status === 'pending').length);
-	let totalCount = $derived(allHosts.length);
-	let totalCpuCores = $derived(allHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0));
-	let totalMemoryMb = $derived(allHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0));
-	let totalRunningVcpus = $derived(allHosts.reduce((sum, h) => sum + h.running_vcpus, 0));
-	let totalRunningMemoryMb = $derived(allHosts.reduce((sum, h) => sum + h.running_memory_mb, 0));
+	// Aggregated stats — platform hosts only (admin needs a heads-up on
+	// platform capacity; BYOC capacity belongs to individual teams).
+	let onlineCount = $derived(platformHosts.filter((h) => h.status === 'online').length);
+	let pendingCount = $derived(platformHosts.filter((h) => h.status === 'pending').length);
+	let totalCount = $derived(platformHosts.length);
+	let totalCpuCores = $derived(platformHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0));
+	let totalMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0));
+	let totalRunningVcpus = $derived(platformHosts.reduce((sum, h) => sum + h.running_vcpus, 0));
+	let totalRunningMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + h.running_memory_mb, 0));

 	function formatMem(mb: number): string {
 		return mb >= 1024 ? `${(mb / 1024).toFixed(0)} GB` : `${mb} MB`;
--- a/frontend/src/routes/dashboard/capsules/+layout.svelte
+++ b/frontend/src/routes/dashboard/capsules/+layout.svelte
@ -6,10 +6,6 @@
 	let { children } = $props();
 </script>

-<svelte:head>
-	<title>Wrenn — Capsules</title>
-</svelte:head>
-
 <main class="flex flex-1 flex-col overflow-y-auto bg-[var(--color-bg-0)]">
 	<!-- Header area -->
 	{#if $page.params.id}
--- a/frontend/src/routes/dashboard/capsules/+page.svelte
+++ b/frontend/src/routes/dashboard/capsules/+page.svelte
@ -256,6 +256,10 @@
 	});
 </script>

+<svelte:head>
+	<title>Wrenn — Capsules</title>
+</svelte:head>
+
 <style>
 	@keyframes capsule-born {
 		0%, 25% { background-color: rgba(94, 140, 88, 0.1); }
--- a/internal/api/handlers_exec_stream.go
+++ b/internal/api/handlers_exec_stream.go
@ -130,22 +130,8 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C

 	// Forward stream events to WebSocket.
 	for stream.Receive() {
-		resp := stream.Msg()
-		switch ev := resp.Event.(type) {
-		case *pb.ExecStreamResponse_Start:
-			writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
-
-		case *pb.ExecStreamResponse_Data:
-			switch o := ev.Data.Output.(type) {
-			case *pb.ExecStreamData_Stdout:
-				writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
-			case *pb.ExecStreamData_Stderr:
-				writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
-			}
-
-		case *pb.ExecStreamResponse_End:
-			exitCode := ev.End.ExitCode
-			writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
+		if m, ok := procRespToWSMsg(stream.Msg()); ok {
+			writeWSJSON(conn, m)
 		}
 	}

@ -159,6 +145,38 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C
 	updateLastActive(h.db, sandboxID, sandboxIDStr)
 }

+// procStreamResp is satisfied by both *pb.ExecStreamResponse and
+// *pb.ConnectProcessResponse: their oneof events carry the same inner messages,
+// so the wire-to-WS mapping below is shared between the exec-stream and
+// connect-process handlers.
+type procStreamResp interface {
+	GetStart() *pb.ExecStreamStart
+	GetData() *pb.ExecStreamData
+	GetEnd() *pb.ExecStreamEnd
+}
+
+// procRespToWSMsg maps one process stream response to the WS message to send.
+// The bool is false when the response carries nothing to forward.
+func procRespToWSMsg(resp procStreamResp) (wsOutMsg, bool) {
+	if s := resp.GetStart(); s != nil {
+		return wsOutMsg{Type: "start", PID: s.Pid}, true
+	}
+	if d := resp.GetData(); d != nil {
+		switch o := d.Output.(type) {
+		case *pb.ExecStreamData_Stdout:
+			return wsOutMsg{Type: "stdout", Data: string(o.Stdout)}, true
+		case *pb.ExecStreamData_Stderr:
+			return wsOutMsg{Type: "stderr", Data: string(o.Stderr)}, true
+		}
+		return wsOutMsg{}, false
+	}
+	if e := resp.GetEnd(); e != nil {
+		exitCode := e.ExitCode
+		return wsOutMsg{Type: "exit", ExitCode: &exitCode}, true
+	}
+	return wsOutMsg{}, false
+}
+
 func sendWSError(conn *websocket.Conn, msg string) {
 	writeWSJSON(conn, wsOutMsg{Type: "error", Data: msg})
 }
--- a/internal/api/handlers_process.go
+++ b/internal/api/handlers_process.go
@ -192,22 +192,8 @@ func (h *processHandler) runConnectProcess(ctx context.Context, conn *websocket.

 	// Forward stream events to WebSocket.
 	for stream.Receive() {
-		resp := stream.Msg()
-		switch ev := resp.Event.(type) {
-		case *pb.ConnectProcessResponse_Start:
-			writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
-
-		case *pb.ConnectProcessResponse_Data:
-			switch o := ev.Data.Output.(type) {
-			case *pb.ExecStreamData_Stdout:
-				writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
-			case *pb.ExecStreamData_Stderr:
-				writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
-			}
-
-		case *pb.ConnectProcessResponse_End:
-			exitCode := ev.End.ExitCode
-			writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
+		if m, ok := procRespToWSMsg(stream.Msg()); ok {
+			writeWSJSON(conn, m)
 		}
 	}

--- a/internal/api/middleware.go
+++ b/internal/api/middleware.go
@ -60,6 +60,10 @@ func agentErrToHTTP(err error) (int, string, string) {
 		return http.StatusServiceUnavailable, "no_hosts_available", "no servers available — try again later"
 	case connect.CodeUnimplemented:
 		return http.StatusNotImplemented, "agent_error", err.Error()
+	case connect.CodeDeadlineExceeded:
+		return http.StatusGatewayTimeout, "timeout", "command timed out"
+	case connect.CodeInternal:
+		return http.StatusInternalServerError, "agent_error", err.Error()
 	default:
 		return http.StatusBadGateway, "agent_error", err.Error()
 	}
--- a/internal/api/sandbox_event_consumer.go
+++ b/internal/api/sandbox_event_consumer.go
@ -144,7 +144,7 @@ func (c *SandboxEventConsumer) handleMessage(ctx context.Context, msg redis.XMes
 		}
 	case events.CapsulePause:
 		if event.Outcome == events.OutcomeSuccess {
-			c.handleAutoPaused(ctx, sandboxID)
+			c.handleAutoPaused(ctx, sandboxID, event)
 		}
 	case events.CapsuleDestroy:
 		if event.Outcome == events.OutcomeSuccess {
@ -226,12 +226,35 @@ func (c *SandboxEventConsumer) handleStarted(ctx context.Context, sandboxID pgty
 	}
 }

-func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID) {
+// handleAutoPaused reflects an autonomous (TTL reaper / shutdown) pause in the
+// DB and writes the audit row for it. The audit write happens only when the
+// status flip actually applied, so a stream redelivery does not double-count,
+// and so the HostMonitor host_state_sync fallback (which audits the
+// callback-lost case) stays mutually exclusive with this path.
+//
+// Uses audit.Log (row only) — NOT LogSandboxAutoPause, which republishes a
+// CapsulePause/system event that would loop straight back into this consumer.
+func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID, event events.Event) {
 	for _, fromStatus := range []string{"running", "pausing"} {
 		if _, err := c.db.UpdateSandboxStatusIf(ctx, db.UpdateSandboxStatusIfParams{
 			ID: sandboxID, Status: fromStatus, Status_2: "paused",
 		}); err == nil {
-			slog.Debug("sandbox event consumer: auto-paused fallback applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus)
+			slog.Debug("sandbox event consumer: auto-paused applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus)
+			reason := event.Metadata["reason"]
+			if reason == "" {
+				reason = "ttl_expired"
+			}
+			teamID, _ := id.ParseTeamID(event.TeamID)
+			c.audit.Log(ctx, audit.Entry{
+				TeamID:       teamID,
+				ActorType:    "system",
+				ResourceType: "sandbox",
+				ResourceID:   id.FormatSandboxID(sandboxID),
+				Action:       "pause",
+				Scope:        "team",
+				Status:       "info",
+				Metadata:     map[string]any{"reason": reason},
+			})
 			return
 		}
 	}
--- a/internal/api/sse_relay.go
+++ b/internal/api/sse_relay.go
@ -104,6 +104,14 @@ func (r *SSERelay) handleMessage(ctx context.Context, msg *redis.Message) {
 		if err != nil {
 			slog.Debug("sse relay: sandbox hydration failed (may be deleted)", "sandbox_id", event.Resource.ID, "error", err)
 		} else {
+			// Override the hydrated status with the status implied by the event
+			// verb. Autonomous transitions (e.g. TTL auto-pause) flip the DB row
+			// in a separate stream consumer that races this Pub/Sub read, so the
+			// hydrated row may still carry the pre-transition status. The event
+			// itself is authoritative for the resulting state.
+			if status, ok := impliedSandboxStatus(event); ok {
+				sb.Status = status
+			}
 			payload.Sandbox = sb
 		}
 	}
@ -138,6 +146,25 @@ func (r *SSERelay) hydrateSandbox(ctx context.Context, sandboxIDStr string) (*sa
 	return &resp, nil
 }

+// impliedSandboxStatus maps a successful capsule lifecycle event to the
+// sandbox status it results in. Used to override a hydrated DB row that may
+// still carry the pre-transition status because the reconciliation consumer
+// that flips it races this Pub/Sub read. Returns false for events with no
+// single deterministic resulting status (failures, destroy, state_changed).
+func impliedSandboxStatus(event events.Event) (string, bool) {
+	if event.Outcome != events.OutcomeSuccess {
+		return "", false
+	}
+	switch event.Event {
+	case events.CapsulePause:
+		return "paused", true
+	case events.CapsuleResume, events.CapsuleCreate:
+		return "running", true
+	default:
+		return "", false
+	}
+}
+
 func isCapsuleEvent(eventType string) bool {
 	switch eventType {
 	case events.CapsuleCreate, events.CapsulePause, events.CapsuleResume, events.CapsuleDestroy, events.CapsuleStateChanged:
--- a/internal/envdclient/client.go
+++ b/internal/envdclient/client.go
@ -25,6 +25,7 @@ type Client struct {
 	hostIP          string
 	base            string
 	healthURL       string
+	activityURL     string
 	httpClient      *http.Client
 	streamingClient *http.Client

@ -42,6 +43,7 @@ func New(hostIP string) *Client {
 		hostIP:          hostIP,
 		base:            base,
 		healthURL:       base + "/health",
+		activityURL:     base + "/activity",
 		httpClient:      httpClient,
 		streamingClient: streamingClient,
 		process:         genconnect.NewProcessClient(streamingClient, base),
@ -117,36 +119,17 @@ func (c *Client) Exec(ctx context.Context, cmd string, args []string, opts *Exec
 	result := &ExecResult{}

 	for stream.Receive() {
-		msg := stream.Msg()
-		if msg.Event == nil {
+		ev, ok := procEventToStreamEvent(stream.Msg().GetEvent())
+		if !ok {
 			continue
 		}
-
-		event := msg.Event.GetEvent()
-		switch e := event.(type) {
-		case *envdpb.ProcessEvent_Start:
-			slog.Debug("process started", "pid", e.Start.GetPid())
-
-		case *envdpb.ProcessEvent_Data:
-			output := e.Data.GetOutput()
-			switch o := output.(type) {
-			case *envdpb.ProcessEvent_DataEvent_Stdout:
-				result.Stdout = append(result.Stdout, o.Stdout...)
-			case *envdpb.ProcessEvent_DataEvent_Stderr:
-				result.Stderr = append(result.Stderr, o.Stderr...)
-			}
-
-		case *envdpb.ProcessEvent_End:
-			result.ExitCode = e.End.GetExitCode()
-			if e.End.Error != nil {
-				slog.Debug("process ended with error",
-					"exit_code", e.End.GetExitCode(),
-					"error", e.End.GetError(),
-				)
-			}
-
-		case *envdpb.ProcessEvent_Keepalive:
-			// Ignore keepalives.
+		switch ev.Type {
+		case "stdout":
+			result.Stdout = append(result.Stdout, ev.Data...)
+		case "stderr":
+			result.Stderr = append(result.Stderr, ev.Data...)
+		case "end":
+			result.ExitCode = ev.ExitCode
 		}
 	}

@ -166,6 +149,76 @@ type ExecStreamEvent struct {
 	Error    string
 }

+// procEventToStreamEvent converts a raw envd ProcessEvent into an
+// ExecStreamEvent. The second return is false for events with no payload to
+// forward (nil event, keepalive, unknown data variant) so callers can skip
+// them. This is the single decoder shared by Exec, ExecStream and
+// ConnectProcess.
+func procEventToStreamEvent(pe *envdpb.ProcessEvent) (ExecStreamEvent, bool) {
+	if pe == nil {
+		return ExecStreamEvent{}, false
+	}
+	switch e := pe.GetEvent().(type) {
+	case *envdpb.ProcessEvent_Start:
+		return ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}, true
+	case *envdpb.ProcessEvent_Data:
+		switch o := e.Data.GetOutput().(type) {
+		case *envdpb.ProcessEvent_DataEvent_Stdout:
+			return ExecStreamEvent{Type: "stdout", Data: o.Stdout}, true
+		case *envdpb.ProcessEvent_DataEvent_Stderr:
+			return ExecStreamEvent{Type: "stderr", Data: o.Stderr}, true
+		}
+		return ExecStreamEvent{}, false
+	case *envdpb.ProcessEvent_End:
+		ev := ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
+		if e.End.Error != nil {
+			ev.Error = e.End.GetError()
+		}
+		return ev, true
+	}
+	return ExecStreamEvent{}, false
+}
+
+// procEventStream is the subset of a Connect server-stream that pumpProcessEvents
+// needs. Both *connect.ServerStreamForClient[StartResponse] and
+// [ConnectResponse] satisfy it.
+type procEventStream[T any] interface {
+	Receive() bool
+	Msg() *T
+	Err() error
+	Close() error
+}
+
+// pumpProcessEvents drains a process server-stream into ch until the stream ends
+// or ctx is cancelled, closing ch on exit. getEvent extracts the ProcessEvent
+// from each message so the same loop works for both the Start and Connect RPCs.
+func pumpProcessEvents[T any](
+	ctx context.Context,
+	stream procEventStream[T],
+	getEvent func(*T) *envdpb.ProcessEvent,
+	ch chan<- ExecStreamEvent,
+	logLabel string,
+) {
+	defer close(ch)
+	defer stream.Close()
+
+	for stream.Receive() {
+		ev, ok := procEventToStreamEvent(getEvent(stream.Msg()))
+		if !ok {
+			continue
+		}
+		select {
+		case ch <- ev:
+		case <-ctx.Done():
+			return
+		}
+	}
+
+	if err := stream.Err(); err != nil && err != io.EOF {
+		slog.Debug(logLabel, "error", err)
+	}
+}
+
 // ExecStream runs a command inside the sandbox and returns a channel of output events.
 // The channel is closed when the process ends or the context is cancelled.
 func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-chan ExecStreamEvent, error) {
@ -184,52 +237,7 @@ func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-
 	}

 	ch := make(chan ExecStreamEvent, 256)
-	go func() {
-		defer close(ch)
-		defer stream.Close()
-
-		for stream.Receive() {
-			msg := stream.Msg()
-			if msg.Event == nil {
-				continue
-			}
-
-			var ev ExecStreamEvent
-			event := msg.Event.GetEvent()
-			switch e := event.(type) {
-			case *envdpb.ProcessEvent_Start:
-				ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
-
-			case *envdpb.ProcessEvent_Data:
-				output := e.Data.GetOutput()
-				switch o := output.(type) {
-				case *envdpb.ProcessEvent_DataEvent_Stdout:
-					ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
-				case *envdpb.ProcessEvent_DataEvent_Stderr:
-					ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
-				}
-
-			case *envdpb.ProcessEvent_End:
-				ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
-				if e.End.Error != nil {
-					ev.Error = e.End.GetError()
-				}
-
-			case *envdpb.ProcessEvent_Keepalive:
-				continue
-			}
-
-			select {
-			case ch <- ev:
-			case <-ctx.Done():
-				return
-			}
-		}
-
-		if err := stream.Err(); err != nil && err != io.EOF {
-			slog.Debug("exec stream error", "error", err)
-		}
-	}()
+	go pumpProcessEvents(ctx, stream, (*envdpb.StartResponse).GetEvent, ch, "exec stream error")

 	return ch, nil
 }
@ -434,7 +442,7 @@ func (c *Client) CancelMemoryPreload(ctx context.Context) error {
 // post-restore initialization. sandbox_id and template_id are passed
 // so envd can set WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID env vars.
 func (c *Client) PostInit(ctx context.Context) error {
-	return c.PostInitWithDefaults(ctx, "", nil, "", "")
+	return c.PostInitWithDefaults(ctx, "", nil, "", "", "")
 }

 // PostInitWithDefaults calls envd's POST /init endpoint with optional default
@ -444,7 +452,7 @@ func (c *Client) PostInit(ctx context.Context) error {
 // timestamp and lifecycle_id are always populated: envd uses them to snap
 // the guest clock to the host's wall time and to detect post-resume calls
 // (which trigger port-forwarder restart + NFS remount).
-func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID string) error {
+func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID, proxyDomain string) error {
 	payload := map[string]any{
 		"timestamp":    time.Now().UTC().Format(time.RFC3339Nano),
 		"lifecycle_id": uuid.NewString(),
@ -461,6 +469,9 @@ func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, e
 	if templateID != "" {
 		payload["template_id"] = templateID
 	}
+	if proxyDomain != "" {
+		payload["proxy_domain"] = proxyDomain
+	}

 	var body io.Reader
 	if len(payload) > 0 {
--- a/internal/envdclient/health.go
+++ b/internal/envdclient/health.go
@ -81,6 +81,42 @@ func (c *Client) WaitUntilRPCReady(ctx context.Context) error {
 	}
 }

+// Activity is envd's liveness snapshot: VM-wide CPU utilisation and IO
+// throughput sampled inside the guest. The host activity sampler uses it to
+// decide whether a sandbox is doing real work and should keep its TTL fresh.
+type Activity struct {
+	CPUCount   uint32  `json:"cpu_count"`
+	CPUUsedPct float32 `json:"cpu_used_pct"`
+	NetBps     uint64  `json:"net_bps"`
+	DiskBps    uint64  `json:"disk_bps"`
+}
+
+// FetchActivity polls envd's /activity endpoint. The endpoint serves straight
+// from in-guest atomics (no syscalls), so it is cheap to call frequently.
+func (c *Client) FetchActivity(ctx context.Context) (*Activity, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.activityURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("build activity request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("fetch envd activity: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("activity check returned %d", resp.StatusCode)
+	}
+
+	var data Activity
+	if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
+		return nil, fmt.Errorf("decode activity response: %w", err)
+	}
+
+	return &data, nil
+}
+
 // healthCheck sends a single GET /health request to envd.
 func (c *Client) healthCheck(ctx context.Context) error {
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil)
--- a/internal/envdclient/process.go
+++ b/internal/envdclient/process.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"io"
-	"log/slog"

 	"connectrpc.com/connect"

@ -87,52 +86,7 @@ func (c *Client) ConnectProcess(ctx context.Context, pid uint32, tag string) (<-
 	}

 	ch := make(chan ExecStreamEvent, 16)
-	go func() {
-		defer close(ch)
-		defer stream.Close()
-
-		for stream.Receive() {
-			msg := stream.Msg()
-			if msg.Event == nil {
-				continue
-			}
-
-			var ev ExecStreamEvent
-			switch e := msg.Event.GetEvent().(type) {
-			case *envdpb.ProcessEvent_Start:
-				ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
-
-			case *envdpb.ProcessEvent_Data:
-				switch o := e.Data.GetOutput().(type) {
-				case *envdpb.ProcessEvent_DataEvent_Stdout:
-					ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
-				case *envdpb.ProcessEvent_DataEvent_Stderr:
-					ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
-				default:
-					continue
-				}
-
-			case *envdpb.ProcessEvent_End:
-				ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
-				if e.End.Error != nil {
-					ev.Error = e.End.GetError()
-				}
-
-			case *envdpb.ProcessEvent_Keepalive:
-				continue
-			}
-
-			select {
-			case ch <- ev:
-			case <-ctx.Done():
-				return
-			}
-		}
-
-		if err := stream.Err(); err != nil && err != io.EOF {
-			slog.Debug("connect process stream error", "error", err)
-		}
-	}()
+	go pumpProcessEvents(ctx, stream, (*envdpb.ConnectResponse).GetEvent, ch, "connect process stream error")

 	return ch, nil
 }
--- a/internal/hostagent/server.go
+++ b/internal/hostagent/server.go
@ -253,7 +253,7 @@ func (s *Server) Exec(

 	result, err := s.mgr.Exec(execCtx, msg.SandboxId, msg.Cmd, msg.Args, opts)
 	if err != nil {
-		return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("exec: %w", err))
+		return nil, envdErr("exec", err)
 	}

 	return connect.NewResponse(&pb.ExecResponse{
@ -395,31 +395,15 @@ func (s *Server) ExecStream(
 	}

 	for ev := range events {
+		start, data, end := execEventParts(ev)
 		var resp pb.ExecStreamResponse
-		switch ev.Type {
-		case "start":
-			resp.Event = &pb.ExecStreamResponse_Start{
-				Start: &pb.ExecStreamStart{Pid: ev.PID},
-			}
-		case "stdout":
-			resp.Event = &pb.ExecStreamResponse_Data{
-				Data: &pb.ExecStreamData{
-					Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
-				},
-			}
-		case "stderr":
-			resp.Event = &pb.ExecStreamResponse_Data{
-				Data: &pb.ExecStreamData{
-					Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
-				},
-			}
-		case "end":
-			resp.Event = &pb.ExecStreamResponse_End{
-				End: &pb.ExecStreamEnd{
-					ExitCode: ev.ExitCode,
-					Error:    ev.Error,
-				},
-			}
+		switch {
+		case start != nil:
+			resp.Event = &pb.ExecStreamResponse_Start{Start: start}
+		case data != nil:
+			resp.Event = &pb.ExecStreamResponse_Data{Data: data}
+		case end != nil:
+			resp.Event = &pb.ExecStreamResponse_End{End: end}
 		default:
 			continue
 		}
@ -431,6 +415,24 @@ func (s *Server) ExecStream(
 	return nil
 }

+// execEventParts maps a streaming exec event to its proto inner message.
+// Exactly one return value is non-nil; all-nil means the event carries nothing
+// to forward. Shared by ExecStream and ConnectProcess, which differ only in the
+// response envelope wrapping these inner messages.
+func execEventParts(ev envdclient.ExecStreamEvent) (*pb.ExecStreamStart, *pb.ExecStreamData, *pb.ExecStreamEnd) {
+	switch ev.Type {
+	case "start":
+		return &pb.ExecStreamStart{Pid: ev.PID}, nil, nil
+	case "stdout":
+		return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data}}, nil
+	case "stderr":
+		return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data}}, nil
+	case "end":
+		return nil, nil, &pb.ExecStreamEnd{ExitCode: ev.ExitCode, Error: ev.Error}
+	}
+	return nil, nil, nil
+}
+
 func (s *Server) WriteFileStream(
 	ctx context.Context,
 	stream *connect.ClientStream[pb.WriteFileStreamRequest],
@ -912,31 +914,15 @@ func (s *Server) ConnectProcess(
 	}

 	for ev := range events {
+		start, data, end := execEventParts(ev)
 		var resp pb.ConnectProcessResponse
-		switch ev.Type {
-		case "start":
-			resp.Event = &pb.ConnectProcessResponse_Start{
-				Start: &pb.ExecStreamStart{Pid: ev.PID},
-			}
-		case "stdout":
-			resp.Event = &pb.ConnectProcessResponse_Data{
-				Data: &pb.ExecStreamData{
-					Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
-				},
-			}
-		case "stderr":
-			resp.Event = &pb.ConnectProcessResponse_Data{
-				Data: &pb.ExecStreamData{
-					Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
-				},
-			}
-		case "end":
-			resp.Event = &pb.ConnectProcessResponse_End{
-				End: &pb.ExecStreamEnd{
-					ExitCode: ev.ExitCode,
-					Error:    ev.Error,
-				},
-			}
+		switch {
+		case start != nil:
+			resp.Event = &pb.ConnectProcessResponse_Start{Start: start}
+		case data != nil:
+			resp.Event = &pb.ConnectProcessResponse_Data{Data: data}
+		case end != nil:
+			resp.Event = &pb.ConnectProcessResponse_End{End: end}
 		default:
 			continue
 		}
--- a/internal/sandbox/activity_test.go
+++ b/internal/sandbox/activity_test.go
@ -0,0 +1,111 @@
+package sandbox
+
+import (
+	"testing"
+
+	"git.omukk.dev/wrenn/wrenn/internal/envdclient"
+)
+
+func TestIsBusy(t *testing.T) {
+	tests := []struct {
+		name string
+		cfg  Config
+		act  envdclient.Activity
+		want bool
+	}{
+		// Default thresholds (zero cfg → defaults: cpu 5%, net 16K, disk 32K).
+		{"idle", Config{}, envdclient.Activity{CPUUsedPct: 0.5, NetBps: 100, DiskBps: 200}, false},
+		{"cpu just below", Config{}, envdclient.Activity{CPUUsedPct: 4.99}, false},
+		{"cpu at threshold", Config{}, envdclient.Activity{CPUUsedPct: 5.0}, true},
+		{"cpu above", Config{}, envdclient.Activity{CPUUsedPct: 80.0}, true},
+		{"net just below", Config{}, envdclient.Activity{NetBps: 16*1024 - 1}, false},
+		{"net at floor", Config{}, envdclient.Activity{NetBps: 16 * 1024}, true},
+		{"disk just below", Config{}, envdclient.Activity{DiskBps: 32*1024 - 1}, false},
+		{"disk at floor", Config{}, envdclient.Activity{DiskBps: 32 * 1024}, true},
+		{"download: low cpu, high net", Config{}, envdclient.Activity{CPUUsedPct: 1.0, NetBps: 5 * 1024 * 1024}, true},
+
+		// Explicit overrides take precedence over defaults.
+		{
+			"custom cpu threshold met",
+			Config{CPUBusyPct: 20.0},
+			envdclient.Activity{CPUUsedPct: 25.0},
+			true,
+		},
+		{
+			"custom cpu threshold not met",
+			Config{CPUBusyPct: 20.0},
+			envdclient.Activity{CPUUsedPct: 10.0},
+			false,
+		},
+		{
+			"custom net floor not met",
+			Config{NetFloorBps: 1024 * 1024},
+			envdclient.Activity{NetBps: 16 * 1024},
+			false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m := &Manager{cfg: tt.cfg}
+			if got := m.isBusy(&tt.act); got != tt.want {
+				t.Errorf("isBusy(%+v) = %v, want %v", tt.act, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestApplyBusySample(t *testing.T) {
+	// Debounce requires busyDebounceSamples consecutive busy samples before the
+	// first bump. Verify the streak math and bump timing.
+	if busyDebounceSamples != 2 {
+		t.Skip("test written for busyDebounceSamples=2")
+	}
+
+	tests := []struct {
+		name        string
+		startStreak int
+		busy        bool
+		wantStreak  int
+		wantBump    bool
+	}{
+		{"first busy, no bump yet", 0, true, 1, false},
+		{"second consecutive busy, bump", 1, true, 2, true},
+		{"sustained busy keeps bumping, streak held", 2, true, 2, true},
+		{"single noise spike from idle, no bump", 0, false, 0, false},
+		{"idle resets a building streak", 1, false, 0, false},
+		{"idle resets a saturated streak", 2, false, 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotStreak, gotBump := applyBusySample(tt.startStreak, tt.busy)
+			if gotStreak != tt.wantStreak || gotBump != tt.wantBump {
+				t.Errorf("applyBusySample(%d, %v) = (%d, %v), want (%d, %v)",
+					tt.startStreak, tt.busy, gotStreak, gotBump, tt.wantStreak, tt.wantBump)
+			}
+		})
+	}
+}
+
+// TestApplyBusySample_NoiseScenario walks a realistic sample sequence: brief
+// noise never crosses the debounce, but sustained work does and then a return
+// to idle resets — proving an isolated spike cannot keep a sandbox alive.
+func TestApplyBusySample_NoiseScenario(t *testing.T) {
+	if busyDebounceSamples != 2 {
+		t.Skip("test written for busyDebounceSamples=2")
+	}
+
+	samples := []bool{true, false, false, true, true, true, false}
+	wantBumps := []bool{false, false, false, false, true, true, false}
+
+	streak := 0
+	for i, busy := range samples {
+		var bump bool
+		streak, bump = applyBusySample(streak, busy)
+		if bump != wantBumps[i] {
+			t.Errorf("sample %d (busy=%v): bump = %v, want %v (streak=%d)",
+				i, busy, bump, wantBumps[i], streak)
+		}
+	}
+}
--- a/internal/sandbox/manager.go
+++ b/internal/sandbox/manager.go
@ -88,14 +88,47 @@ type Config struct {
 	EnvdTimeout         time.Duration
 	DefaultRootfsSizeMB int // target size for template rootfs images; 0 → DefaultDiskSizeMB

+	// ProxyDomain is the public domain sandboxes are served under (e.g.
+	// "wrenn.dev"). Injected into envd at /init so `envd ports` can build
+	// {port}-{sandbox_id}.{domain} URLs.
+	ProxyDomain string
+
 	// Resolved at startup by the host agent.
 	KernelPath    string // path to the latest vmlinux-x.y.z
 	KernelVersion string // semver extracted from filename
 	VMMBin        string // path to the cloud-hypervisor binary
 	VMMVersion    string // semver from cloud-hypervisor --version
 	AgentVersion  string // host agent version (injected via ldflags)
+
+	// Activity sampler thresholds. The sampler polls each running sandbox's
+	// guest liveness and refreshes its TTL when it is doing real work, so a
+	// long-running but non-interactive job is not mistaken for inactive. A
+	// sandbox counts as busy when guest CPU ≥ CPUBusyPct, or net/disk
+	// throughput ≥ the respective floor (bytes/sec). Zero values fall back to
+	// the package defaults at sampler start.
+	ActivitySampleInterval time.Duration
+	CPUBusyPct             float32
+	NetFloorBps            uint64
+	DiskFloorBps           uint64
 }

+// Activity sampler defaults. Thresholds sit clear of idle-VM background noise
+// (envd's own sampler thread, guest timers) so a parked sandbox still times
+// out; the debounce below guards against a lone noisy sample masquerading as
+// work. All are env-overridable on the host agent.
+const (
+	defaultActivitySampleInterval = 5 * time.Second
+	defaultCPUBusyPct             = 5.0       // percent of total vCPU capacity
+	defaultNetFloorBps            = 16 * 1024 // 16 KB/s
+	defaultDiskFloorBps           = 32 * 1024 // 32 KB/s
+	activityPollTimeout           = 3 * time.Second
+	activitySampleConcurrency     = 16
+	// busyDebounceSamples is how many consecutive busy samples are required
+	// before the sandbox's TTL is refreshed. With a 5s interval, real work
+	// registers within ~10s while isolated noise spikes are ignored.
+	busyDebounceSamples = 2
+)
+
 // LifecycleEvent describes an autonomous state change initiated by the agent.
 type LifecycleEvent struct {
 	Event     string
@ -189,6 +222,12 @@ type sandboxState struct {
 	ring          *metricsRing       // tiered ring buffers for CPU/mem/disk metrics
 	samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine
 	samplerDone   chan struct{}      // closed when the sampling goroutine exits
+
+	// activityBusyStreak counts consecutive busy activity samples. A single
+	// noisy sample (idle background CPU, a stray packet) must not refresh the
+	// TTL, so LastActiveAt is only bumped once the streak reaches
+	// busyDebounceSamples. Reset to 0 by any non-busy sample. Guarded by m.mu.
+	activityBusyStreak int
 }

 // buildMetadata constructs the metadata map with version information.
@ -419,14 +458,14 @@ func (m *Manager) Create(
 	// Fetch envd version (best-effort).
 	envdVersion, _ := client.FetchVersion(ctx)

-	// Apply template defaults via envd /init (no-op when both empty).
-	if defaultUser != "" || len(defaultEnv) > 0 {
-		initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
-		if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID)); err != nil {
-			slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err)
-		}
-		initCancel()
+	// Apply template defaults + sandbox identity via envd /init. Always called
+	// on create so envd records its sandbox ID and proxy domain (used by
+	// `envd ports`), even when the template specifies no user/env defaults.
+	initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
+	if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID), m.cfg.ProxyDomain); err != nil {
+		slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err)
 	}
+	initCancel()

 	now := time.Now()
 	sb := &sandboxState{
@ -667,7 +706,7 @@ func (m *Manager) SetDefaults(ctx context.Context, sandboxID, defaultUser string
 	if err != nil {
 		return err
 	}
-	return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "")
+	return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "", "")
 }

 // PtyAttach starts a new PTY process or reconnects to an existing one.
@ -762,6 +801,11 @@ func (m *Manager) AcquireProxyConn(sandboxID string) (net.IP, *ConnTracker, bool
 	if !sb.connTracker.Acquire() {
 		return nil, nil, false
 	}
+	// Inbound proxy traffic counts as activity: an idle web server reachable
+	// through the proxy should not be auto-paused while it is serving requests.
+	m.mu.Lock()
+	sb.LastActiveAt = time.Now()
+	m.mu.Unlock()
 	return sb.HostIP, sb.connTracker, true
 }

@ -872,6 +916,146 @@ func (m *Manager) reapExpired(_ context.Context) {
 	}
 }

+// StartActivitySampler starts a background goroutine that polls each running
+// sandbox's guest liveness (CPU + net/disk IO) and refreshes LastActiveAt when
+// the sandbox is doing real work. This is what keeps a long-running but
+// non-interactive job (a build, a download) from being auto-paused by the TTL
+// reaper, while an idle workload (sleep, a parked shell) still times out.
+func (m *Manager) StartActivitySampler(ctx context.Context) {
+	interval := m.cfg.ActivitySampleInterval
+	if interval <= 0 {
+		interval = defaultActivitySampleInterval
+	}
+
+	go func() {
+		ticker := time.NewTicker(interval)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-m.stopCh:
+				return
+			case <-ticker.C:
+				m.sampleActivity(ctx)
+			}
+		}
+	}()
+}
+
+// activityTarget pairs a sandbox ID with the envd client to poll.
+type activityTarget struct {
+	id     string
+	client *envdclient.Client
+}
+
+func (m *Manager) sampleActivity(ctx context.Context) {
+	// Snapshot the running sandboxes and their clients under the lock, then
+	// poll over the network without holding it.
+	m.mu.RLock()
+	targets := make([]activityTarget, 0, len(m.boxes))
+	for id, sb := range m.boxes {
+		if sb.Status != models.StatusRunning {
+			continue
+		}
+		// Skip sandboxes still loading memory after a resume — they are not
+		// settled and their IO/CPU is preload noise, not user work.
+		if sb.memLoadDone != nil {
+			select {
+			case <-sb.memLoadDone:
+			default:
+				continue
+			}
+		}
+		c := sb.client.Load()
+		if c == nil {
+			continue
+		}
+		targets = append(targets, activityTarget{id: id, client: c})
+	}
+	m.mu.RUnlock()
+
+	if len(targets) == 0 {
+		return
+	}
+
+	sem := make(chan struct{}, activitySampleConcurrency)
+	var wg sync.WaitGroup
+	for _, t := range targets {
+		wg.Add(1)
+		sem <- struct{}{}
+		go func(t activityTarget) {
+			defer wg.Done()
+			defer func() { <-sem }()
+			m.pollAndBump(ctx, t)
+		}(t)
+	}
+	wg.Wait()
+}
+
+// pollAndBump fetches one sandbox's activity and refreshes its TTL once it has
+// been busy for busyDebounceSamples consecutive samples. Poll failures are
+// treated as a non-busy sample: an unreachable envd is handled by the reaper /
+// heartbeat paths, and resetting the streak is the safe default.
+func (m *Manager) pollAndBump(ctx context.Context, t activityTarget) {
+	pollCtx, cancel := context.WithTimeout(ctx, activityPollTimeout)
+	defer cancel()
+
+	act, err := t.client.FetchActivity(pollCtx)
+	busy := err == nil && m.isBusy(act)
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	sb, ok := m.boxes[t.id]
+	if !ok || sb.Status != models.StatusRunning {
+		return
+	}
+
+	streak, bump := applyBusySample(sb.activityBusyStreak, busy)
+	sb.activityBusyStreak = streak
+	if bump {
+		sb.LastActiveAt = time.Now()
+	}
+}
+
+// applyBusySample advances a debounce streak with the latest sample and
+// reports whether the TTL should be refreshed this tick. A non-busy sample
+// resets the streak; the bump fires once the streak reaches the debounce
+// threshold and on every busy tick thereafter (the streak is held at the
+// threshold rather than growing unbounded).
+func applyBusySample(streak int, busy bool) (newStreak int, bump bool) {
+	if !busy {
+		return 0, false
+	}
+	streak++
+	if streak >= busyDebounceSamples {
+		return busyDebounceSamples, true
+	}
+	return streak, false
+}
+
+// isBusy reports whether a guest liveness snapshot represents real work.
+func (m *Manager) isBusy(act *envdclient.Activity) bool {
+	cpuThreshold := m.cfg.CPUBusyPct
+	if cpuThreshold <= 0 {
+		cpuThreshold = defaultCPUBusyPct
+	}
+	netFloor := m.cfg.NetFloorBps
+	if netFloor == 0 {
+		netFloor = defaultNetFloorBps
+	}
+	diskFloor := m.cfg.DiskFloorBps
+	if diskFloor == 0 {
+		diskFloor = defaultDiskFloorBps
+	}
+
+	return act.CPUUsedPct >= cpuThreshold ||
+		act.NetBps >= netFloor ||
+		act.DiskBps >= diskFloor
+}
+
 // Shutdown gracefully drains the manager. Running sandboxes are paused so
 // their state survives across agent restarts; any sandboxes still holding
 // runtime resources after PauseAll (e.g. paused failed, or status was
--- a/internal/sandbox/restore.go
+++ b/internal/sandbox/restore.go
@ -110,7 +110,7 @@ func (m *Manager) initAndStartMemoryLoader(ctx context.Context, sb *sandboxState
 		slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID)
 		return
 	}
-	if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr); err != nil {
+	if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr, m.cfg.ProxyDomain); err != nil {
 		slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err)
 	}
 @ -1 +1 @@
 .2.0
 .2.1