forked from wrenn/wrenn
v0.2.1 (#55)
Co-authored-by: Tasnim Kabir Sadik <tksadik@omukk.dev> Reviewed-on: wrenn/wrenn#55 Co-authored-by: pptx704 <rafeed@omukk.dev> Co-committed-by: pptx704 <rafeed@omukk.dev>
This commit is contained in:
15
.env.example
15
.env.example
@ -17,6 +17,21 @@ WRENN_HOST_INTERFACE=eth0
|
||||
WRENN_CP_URL=http://localhost:9725
|
||||
WRENN_DEFAULT_ROOTFS_SIZE=5Gi
|
||||
WRENN_CH_BIN=/usr/local/bin/cloud-hypervisor
|
||||
# Public domain sandboxes are served under; injected into envd so `envd ports`
|
||||
# can build {port}-{sandbox_id}.{domain} URLs.
|
||||
WRENN_PROXY_DOMAIN=wrenn.dev
|
||||
|
||||
# Inactivity activity sampler (all optional; shown values are the defaults).
|
||||
# The host polls each running sandbox's guest liveness and refreshes its
|
||||
# inactivity TTL when it is doing real work, so a long-running but
|
||||
# non-interactive job (build, download) is not auto-paused. A sandbox counts
|
||||
# as busy when guest CPU ≥ threshold, or net/disk throughput ≥ the floor.
|
||||
# Busy requires the threshold to hold for 2 consecutive samples (debounced),
|
||||
# so isolated idle-noise spikes do not keep a sandbox alive.
|
||||
WRENN_ACTIVITY_SAMPLE_INTERVAL=5s
|
||||
WRENN_CPU_BUSY_THRESHOLD=5.0
|
||||
WRENN_NET_FLOOR_BPS=16384
|
||||
WRENN_DISK_FLOOR_BPS=32768
|
||||
|
||||
# Auth
|
||||
JWT_SECRET=
|
||||
|
||||
@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
|
||||
## Project Overview
|
||||
|
||||
Wrenn Sandbox is a microVM-based code execution platform. Users create isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Think E2B but with persistent sandboxes, pool-based pricing, and a single-binary deployment story.
|
||||
Wrenn is an open-source, self-hosted dev environment platform. Users spin up isolated sandboxes (Cloud Hypervisor microVMs), run code inside them, and get output back via SDKs. Fast boot, persistent state, and a single agent binary on each host you own.
|
||||
|
||||
## Build & Development Commands
|
||||
|
||||
@ -28,7 +28,7 @@ make dev-envd # envd in debug mode (port 49983)
|
||||
make check # fmt + vet + lint + test (CI order)
|
||||
make test # Unit tests: go test -race -v ./internal/...
|
||||
make test-integration # Integration tests (require host agent + Cloud Hypervisor)
|
||||
make fmt # gofmt
|
||||
make fmt # gofmt and rust fmt
|
||||
make vet # go vet
|
||||
make lint # golangci-lint
|
||||
|
||||
|
||||
1
Makefile
1
Makefile
@ -106,6 +106,7 @@ sqlc:
|
||||
|
||||
fmt:
|
||||
gofmt -w .
|
||||
cargo fmt --manifest-path envd-rs/Cargo.toml
|
||||
|
||||
lint:
|
||||
golangci-lint run ./...
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
|
||||
Secure infrastructure for AI
|
||||
|
||||
Wrenn is an open-source self-hosted dev environment platform. Each capsule is a fully isolated virtual machine — booted in seconds, persistent across sessions. Run the control plane anywhere, deploy a single agent binary on each compute host.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Linux host with `/dev/kvm` access (bare metal or nested virt)
|
||||
|
||||
@ -1 +1 @@
|
||||
0.2.0
|
||||
0.2.1
|
||||
|
||||
@ -1 +1 @@
|
||||
0.2.0
|
||||
0.2.1
|
||||
|
||||
@ -148,6 +148,13 @@ func main() {
|
||||
VMMBin: chBin,
|
||||
VMMVersion: chVersion,
|
||||
AgentVersion: version,
|
||||
ProxyDomain: envOrDefault("WRENN_PROXY_DOMAIN", "wrenn.dev"),
|
||||
|
||||
// Activity sampler tuning (all optional; zero → sandbox package default).
|
||||
ActivitySampleInterval: envDuration("WRENN_ACTIVITY_SAMPLE_INTERVAL"),
|
||||
CPUBusyPct: envFloat32("WRENN_CPU_BUSY_THRESHOLD"),
|
||||
NetFloorBps: envUint64("WRENN_NET_FLOOR_BPS"),
|
||||
DiskFloorBps: envUint64("WRENN_DISK_FLOOR_BPS"),
|
||||
}
|
||||
|
||||
// Remove any *.staging-* / *.trash-* directories left behind by a
|
||||
@ -171,6 +178,7 @@ func main() {
|
||||
mgr.RestorePausedSandboxes()
|
||||
|
||||
mgr.StartTTLReaper(ctx)
|
||||
mgr.StartActivitySampler(ctx)
|
||||
|
||||
// httpServer is declared here so the shutdown func can reference it.
|
||||
// ReadTimeout/WriteTimeout are intentionally omitted — they would kill
|
||||
@ -311,6 +319,49 @@ func envOrDefault(key, def string) string {
|
||||
return def
|
||||
}
|
||||
|
||||
// envDuration parses an optional duration env var (e.g. "5s"). Empty or
|
||||
// invalid → zero, letting the sandbox package apply its default.
|
||||
func envDuration(key string) time.Duration {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return 0
|
||||
}
|
||||
d, err := time.ParseDuration(v)
|
||||
if err != nil {
|
||||
slog.Warn("invalid duration env var, using default", "key", key, "value", v)
|
||||
return 0
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// envFloat32 parses an optional float env var. Empty or invalid → 0.
|
||||
func envFloat32(key string) float32 {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return 0
|
||||
}
|
||||
f, err := strconv.ParseFloat(v, 32)
|
||||
if err != nil {
|
||||
slog.Warn("invalid float env var, using default", "key", key, "value", v)
|
||||
return 0
|
||||
}
|
||||
return float32(f)
|
||||
}
|
||||
|
||||
// envUint64 parses an optional unsigned-int env var. Empty or invalid → 0.
|
||||
func envUint64(key string) uint64 {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return 0
|
||||
}
|
||||
n, err := strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
slog.Warn("invalid uint env var, using default", "key", key, "value", v)
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// checkPrivileges verifies the process has the required Linux capabilities.
|
||||
// Always reads CapEff — even for root — because a root process inside a
|
||||
// restricted container (e.g. docker --cap-drop=all) may not have all caps.
|
||||
|
||||
2
envd-rs/Cargo.lock
generated
2
envd-rs/Cargo.lock
generated
@ -529,7 +529,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "envd"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"axum",
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "envd"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
edition = "2024"
|
||||
rust-version = "1.95"
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@ const ACCESS_TOKEN_HEADER: &str = "x-access-token";
|
||||
/// Format: "METHOD/path"
|
||||
const AUTH_EXCLUDED: &[&str] = &[
|
||||
"GET/health",
|
||||
"GET/activity",
|
||||
"GET/files",
|
||||
"POST/files",
|
||||
"POST/init",
|
||||
@ -21,11 +22,7 @@ const AUTH_EXCLUDED: &[&str] = &[
|
||||
];
|
||||
|
||||
/// Axum middleware that checks X-Access-Token header.
|
||||
pub async fn auth_layer(
|
||||
request: Request,
|
||||
next: Next,
|
||||
access_token: Arc<SecureToken>,
|
||||
) -> Response {
|
||||
pub async fn auth_layer(request: Request, next: Next, access_token: Arc<SecureToken>) -> Response {
|
||||
if access_token.is_set() {
|
||||
let method = request.method().as_str();
|
||||
let path = request.uri().path();
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
pub mod token;
|
||||
pub mod signing;
|
||||
pub mod middleware;
|
||||
pub mod signing;
|
||||
pub mod token;
|
||||
|
||||
@ -140,13 +140,32 @@ mod tests {
|
||||
#[test]
|
||||
fn validate_correct_header_token() {
|
||||
let token = test_token(b"secret");
|
||||
assert!(validate_signing(&token, Some("secret"), None, None, "root", "/f", READ_OPERATION).is_ok());
|
||||
assert!(
|
||||
validate_signing(
|
||||
&token,
|
||||
Some("secret"),
|
||||
None,
|
||||
None,
|
||||
"root",
|
||||
"/f",
|
||||
READ_OPERATION
|
||||
)
|
||||
.is_ok()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_wrong_header_token() {
|
||||
let token = test_token(b"secret");
|
||||
let result = validate_signing(&token, Some("wrong"), None, None, "root", "/f", READ_OPERATION);
|
||||
let result = validate_signing(
|
||||
&token,
|
||||
Some("wrong"),
|
||||
None,
|
||||
None,
|
||||
"root",
|
||||
"/f",
|
||||
READ_OPERATION,
|
||||
);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("does not match"));
|
||||
}
|
||||
@ -156,13 +175,32 @@ mod tests {
|
||||
let token = test_token(b"secret");
|
||||
let exp = far_future();
|
||||
let sig = generate_signature(&token, "/file", "root", READ_OPERATION, Some(exp)).unwrap();
|
||||
assert!(validate_signing(&token, None, Some(&sig), Some(exp), "root", "/file", READ_OPERATION).is_ok());
|
||||
assert!(
|
||||
validate_signing(
|
||||
&token,
|
||||
None,
|
||||
Some(&sig),
|
||||
Some(exp),
|
||||
"root",
|
||||
"/file",
|
||||
READ_OPERATION
|
||||
)
|
||||
.is_ok()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_invalid_signature() {
|
||||
let token = test_token(b"secret");
|
||||
let result = validate_signing(&token, None, Some("v1_bad"), Some(far_future()), "root", "/f", READ_OPERATION);
|
||||
let result = validate_signing(
|
||||
&token,
|
||||
None,
|
||||
Some("v1_bad"),
|
||||
Some(far_future()),
|
||||
"root",
|
||||
"/f",
|
||||
READ_OPERATION,
|
||||
);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("invalid signature"));
|
||||
}
|
||||
@ -172,7 +210,15 @@ mod tests {
|
||||
let token = test_token(b"secret");
|
||||
let expired: i64 = 1_000_000;
|
||||
let sig = generate_signature(&token, "/f", "root", READ_OPERATION, Some(expired)).unwrap();
|
||||
let result = validate_signing(&token, None, Some(&sig), Some(expired), "root", "/f", READ_OPERATION);
|
||||
let result = validate_signing(
|
||||
&token,
|
||||
None,
|
||||
Some(&sig),
|
||||
Some(expired),
|
||||
"root",
|
||||
"/f",
|
||||
READ_OPERATION,
|
||||
);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("expired"));
|
||||
}
|
||||
@ -197,7 +243,18 @@ mod tests {
|
||||
fn validate_valid_signature_no_expiration() {
|
||||
let token = test_token(b"secret");
|
||||
let sig = generate_signature(&token, "/file", "root", READ_OPERATION, None).unwrap();
|
||||
assert!(validate_signing(&token, None, Some(&sig), None, "root", "/file", READ_OPERATION).is_ok());
|
||||
assert!(
|
||||
validate_signing(
|
||||
&token,
|
||||
None,
|
||||
Some(&sig),
|
||||
None,
|
||||
"root",
|
||||
"/file",
|
||||
READ_OPERATION
|
||||
)
|
||||
.is_ok()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@ -19,20 +19,25 @@ pub struct Cgroup2Manager {
|
||||
}
|
||||
|
||||
impl Cgroup2Manager {
|
||||
pub fn new(root: &str, configs: &[(ProcessType, &str, &[(&str, &str)])]) -> Result<Self, String> {
|
||||
pub fn new(
|
||||
root: &str,
|
||||
configs: &[(ProcessType, &str, &[(&str, &str)])],
|
||||
) -> Result<Self, String> {
|
||||
let mut fds = HashMap::new();
|
||||
|
||||
for (proc_type, sub_path, properties) in configs {
|
||||
let full_path = PathBuf::from(root).join(sub_path);
|
||||
|
||||
fs::create_dir_all(&full_path).map_err(|e| {
|
||||
format!("failed to create cgroup {}: {e}", full_path.display())
|
||||
})?;
|
||||
fs::create_dir_all(&full_path)
|
||||
.map_err(|e| format!("failed to create cgroup {}: {e}", full_path.display()))?;
|
||||
|
||||
for (name, value) in *properties {
|
||||
let prop_path = full_path.join(name);
|
||||
fs::write(&prop_path, value).map_err(|e| {
|
||||
format!("failed to write cgroup property {}: {e}", prop_path.display())
|
||||
format!(
|
||||
"failed to write cgroup property {}: {e}",
|
||||
prop_path.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
|
||||
5
envd-rs/src/cmd/mod.rs
Normal file
5
envd-rs/src/cmd/mod.rs
Normal file
@ -0,0 +1,5 @@
|
||||
//! Client subcommands for the `envd` binary. These run as short-lived
|
||||
//! invocations (e.g. `envd ports`) inside the guest, separate from the
|
||||
//! long-running daemon, and exit when done.
|
||||
|
||||
pub mod ports;
|
||||
164
envd-rs/src/cmd/ports.rs
Normal file
164
envd-rs/src/cmd/ports.rs
Normal file
@ -0,0 +1,164 @@
|
||||
//! `envd ports` — list the open ports inside the sandbox that are reachable
|
||||
//! from outside, alongside the URL each is served at.
|
||||
//!
|
||||
//! Runs as a one-shot client (not the daemon): it scans `/proc/net/tcp[6]`
|
||||
//! directly via the shared port helper and reads the sandbox identity that the
|
||||
//! daemon recorded under /run/wrenn at /init time. It refuses to run outside a
|
||||
//! wrenn sandbox.
|
||||
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::config::{DEFAULT_PORT, DEFAULT_PROXY_DOMAIN, WRENN_RUN_DIR};
|
||||
use crate::port::conn::reachable_listening_ports;
|
||||
|
||||
/// Arguments for the `envd ports` subcommand.
|
||||
#[derive(clap::Args)]
|
||||
pub struct PortsArgs {
|
||||
/// Override the proxy domain used to build URLs (default: the domain
|
||||
/// injected by the host, falling back to the built-in default).
|
||||
#[arg(long)]
|
||||
domain: Option<String>,
|
||||
|
||||
/// Emit JSON instead of a table.
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct PortEntry {
|
||||
port: u32,
|
||||
url: String,
|
||||
}
|
||||
|
||||
/// Runs the subcommand and returns the desired process exit code.
|
||||
pub fn run(args: &PortsArgs) -> i32 {
|
||||
if !inside_sandbox() {
|
||||
eprintln!("envd ports: not running inside a wrenn sandbox");
|
||||
return 1;
|
||||
}
|
||||
|
||||
let sandbox_id = read_identity("WRENN_SANDBOX_ID", ".WRENN_SANDBOX_ID");
|
||||
let domain = args
|
||||
.domain
|
||||
.clone()
|
||||
.filter(|d| !d.is_empty())
|
||||
.or_else(|| read_identity("WRENN_PROXY_DOMAIN", ".WRENN_PROXY_DOMAIN"))
|
||||
.unwrap_or_else(|| DEFAULT_PROXY_DOMAIN.to_string());
|
||||
|
||||
let entries: Vec<PortEntry> = reachable_listening_ports(DEFAULT_PORT as u32)
|
||||
.into_iter()
|
||||
.map(|port| PortEntry {
|
||||
url: build_url(port, sandbox_id.as_deref(), &domain),
|
||||
port,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if args.json {
|
||||
match serde_json::to_string_pretty(&entries) {
|
||||
Ok(s) => println!("{s}"),
|
||||
Err(e) => {
|
||||
eprintln!("envd ports: failed to encode JSON: {e}");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if entries.is_empty() {
|
||||
println!("No open ports.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
println!("{:<6} {}", "PORT", "URL");
|
||||
for e in &entries {
|
||||
println!("{:<6} {}", e.port, e.url);
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// A wrenn sandbox is identified by the marker the daemon writes at startup
|
||||
/// (`/run/wrenn/.WRENN_SANDBOX`) and the `WRENN_SANDBOX` env var it exports
|
||||
/// into spawned processes. Running `envd ports` on a normal host finds neither
|
||||
/// and is refused.
|
||||
fn inside_sandbox() -> bool {
|
||||
if std::env::var("WRENN_SANDBOX").as_deref() == Ok("true") {
|
||||
return true;
|
||||
}
|
||||
Path::new(WRENN_RUN_DIR).join(".WRENN_SANDBOX").exists()
|
||||
}
|
||||
|
||||
/// Reads an identity value from the environment, falling back to the matching
|
||||
/// /run/wrenn file. Returns None when neither is set or both are blank.
|
||||
fn read_identity(env_key: &str, file_name: &str) -> Option<String> {
|
||||
if let Ok(v) = std::env::var(env_key) {
|
||||
let v = v.trim().to_string();
|
||||
if !v.is_empty() {
|
||||
return Some(v);
|
||||
}
|
||||
}
|
||||
match fs::read_to_string(Path::new(WRENN_RUN_DIR).join(file_name)) {
|
||||
Ok(v) => {
|
||||
let v = v.trim().to_string();
|
||||
if v.is_empty() { None } else { Some(v) }
|
||||
}
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the externally-reachable URL for a port. With a known sandbox ID the
|
||||
/// result is a working https URL; without it (identity not yet injected) the
|
||||
/// sandbox-ID segment degrades to a `<sandbox-id>` placeholder so output is
|
||||
/// still informative.
|
||||
fn build_url(port: u32, sandbox_id: Option<&str>, domain: &str) -> String {
|
||||
let id = sandbox_id.unwrap_or("<sandbox-id>");
|
||||
format!("https://{port}-{id}.{domain}")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn url_with_sandbox_id() {
|
||||
assert_eq!(
|
||||
build_url(8000, Some("cl-abcd1234"), "wrenn.dev"),
|
||||
"https://8000-cl-abcd1234.wrenn.dev"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn url_without_sandbox_id_uses_placeholder() {
|
||||
assert_eq!(
|
||||
build_url(5173, None, "wrenn.dev"),
|
||||
"https://5173-<sandbox-id>.wrenn.dev"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn url_honors_custom_domain() {
|
||||
assert_eq!(
|
||||
build_url(3000, Some("cl-deadbeef"), "sandbox.example.com"),
|
||||
"https://3000-cl-deadbeef.sandbox.example.com"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_identity_prefers_env() {
|
||||
// SAFETY: test-local env var, single-threaded test body.
|
||||
unsafe { std::env::set_var("ENVD_PORTS_TEST_ID", " cl-fromenv ") };
|
||||
assert_eq!(
|
||||
read_identity("ENVD_PORTS_TEST_ID", ".nonexistent-file"),
|
||||
Some("cl-fromenv".to_string())
|
||||
);
|
||||
unsafe { std::env::remove_var("ENVD_PORTS_TEST_ID") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_identity_none_when_unset() {
|
||||
assert_eq!(
|
||||
read_identity("ENVD_PORTS_TEST_UNSET", ".nonexistent-file"),
|
||||
None
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -7,5 +7,10 @@ pub const PORT_SCANNER_INTERVAL: Duration = Duration::from_millis(1000);
|
||||
pub const DEFAULT_USER: &str = "root";
|
||||
pub const WRENN_RUN_DIR: &str = "/run/wrenn";
|
||||
|
||||
/// Fallback proxy domain used by `envd ports` to build URLs when the host has
|
||||
/// not injected one via /init. Matches the host agent's WRENN_PROXY_DOMAIN
|
||||
/// default.
|
||||
pub const DEFAULT_PROXY_DOMAIN: &str = "wrenn.dev";
|
||||
|
||||
pub const KILOBYTE: u64 = 1024;
|
||||
pub const MEGABYTE: u64 = 1024 * KILOBYTE;
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
pub mod hmac_sha256;
|
||||
pub mod sha256;
|
||||
pub mod sha512;
|
||||
pub mod hmac_sha256;
|
||||
|
||||
@ -20,14 +20,22 @@ mod tests {
|
||||
const VECTORS: &[(&[u8], &str)] = &[
|
||||
(b"", "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"),
|
||||
(b"abc", "ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0"),
|
||||
(b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE"),
|
||||
(
|
||||
b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
||||
"JI1qYdIGOLjlwCaTDD5gOaM85Flk/yFn9uzt1BnbBsE",
|
||||
),
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn known_answer_with_prefix() {
|
||||
for (input, expected_b64) in VECTORS {
|
||||
let result = hash(input);
|
||||
assert_eq!(result, format!("$sha256${expected_b64}"), "input: {:?}", String::from_utf8_lossy(input));
|
||||
assert_eq!(
|
||||
result,
|
||||
format!("$sha256${expected_b64}"),
|
||||
"input: {:?}",
|
||||
String::from_utf8_lossy(input)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,7 +43,12 @@ mod tests {
|
||||
fn known_answer_without_prefix() {
|
||||
for (input, expected_b64) in VECTORS {
|
||||
let result = hash_without_prefix(input);
|
||||
assert_eq!(result, *expected_b64, "input: {:?}", String::from_utf8_lossy(input));
|
||||
assert_eq!(
|
||||
result,
|
||||
*expected_b64,
|
||||
"input: {:?}",
|
||||
String::from_utf8_lossy(input)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -15,9 +15,18 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
const VECTORS: &[(&str, &str)] = &[
|
||||
("", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"),
|
||||
("abc", "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f"),
|
||||
("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445"),
|
||||
(
|
||||
"",
|
||||
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
|
||||
),
|
||||
(
|
||||
"abc",
|
||||
"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f",
|
||||
),
|
||||
(
|
||||
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
||||
"204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445",
|
||||
),
|
||||
];
|
||||
|
||||
#[test]
|
||||
@ -30,7 +39,10 @@ mod tests {
|
||||
#[test]
|
||||
fn str_and_bytes_agree() {
|
||||
for (input, _) in VECTORS {
|
||||
assert_eq!(hash_access_token(input), hash_access_token_bytes(input.as_bytes()));
|
||||
assert_eq!(
|
||||
hash_access_token(input),
|
||||
hash_access_token_bytes(input.as_bytes())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,6 +50,9 @@ mod tests {
|
||||
fn output_is_lowercase_hex_128_chars() {
|
||||
let h = hash_access_token("anything");
|
||||
assert_eq!(h.len(), 128);
|
||||
assert!(h.chars().all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
|
||||
assert!(
|
||||
h.chars()
|
||||
.all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -62,7 +62,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn workdir_explicit_overrides_default() {
|
||||
assert_eq!(resolve_default_workdir("/explicit", Some("/default")), "/explicit");
|
||||
assert_eq!(
|
||||
resolve_default_workdir("/explicit", Some("/default")),
|
||||
"/explicit"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -82,7 +85,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn username_explicit_returns_explicit() {
|
||||
assert_eq!(resolve_default_username(Some("root"), "wrenn").unwrap(), "root");
|
||||
assert_eq!(
|
||||
resolve_default_username(Some("root"), "wrenn").unwrap(),
|
||||
"root"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
37
envd-rs/src/http/activity.rs
Normal file
37
envd-rs/src/http/activity.rs
Normal file
@ -0,0 +1,37 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::Json;
|
||||
use axum::extract::State;
|
||||
use axum::http::header;
|
||||
use axum::response::IntoResponse;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Liveness snapshot the host activity sampler polls to decide whether a
|
||||
/// sandbox is doing real work. All fields are served straight from atomics
|
||||
/// updated by the 1s sampler thread — no syscalls per request, so the host
|
||||
/// can poll cheaply at a few-second cadence.
|
||||
#[derive(Serialize)]
|
||||
pub struct Activity {
|
||||
cpu_count: u32,
|
||||
cpu_used_pct: f32,
|
||||
net_bps: u64,
|
||||
disk_bps: u64,
|
||||
}
|
||||
|
||||
pub async fn get_activity(State(state): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
tracing::trace!("get activity");
|
||||
|
||||
let body = Activity {
|
||||
cpu_count: state.cpu_count(),
|
||||
cpu_used_pct: state.cpu_used_pct(),
|
||||
net_bps: state.net_bps(),
|
||||
disk_bps: state.disk_bps(),
|
||||
};
|
||||
|
||||
(
|
||||
[(header::CACHE_CONTROL, "no-store")],
|
||||
Json(body),
|
||||
)
|
||||
}
|
||||
@ -20,7 +20,10 @@ fn parse_encoding_with_quality(value: &str) -> EncodingWithQuality {
|
||||
let enc = value[..idx].trim();
|
||||
for param in params.split(';') {
|
||||
let param = param.trim();
|
||||
if let Some(stripped) = param.strip_prefix("q=").or_else(|| param.strip_prefix("Q=")) {
|
||||
if let Some(stripped) = param
|
||||
.strip_prefix("q=")
|
||||
.or_else(|| param.strip_prefix("Q="))
|
||||
{
|
||||
if let Ok(q) = stripped.parse::<f64>() {
|
||||
quality = q;
|
||||
}
|
||||
@ -43,8 +46,10 @@ fn parse_accept_encoding_header(header: &str) -> (Vec<EncodingWithQuality>, bool
|
||||
return (Vec::new(), false);
|
||||
}
|
||||
|
||||
let encodings: Vec<EncodingWithQuality> =
|
||||
header.split(',').map(|v| parse_encoding_with_quality(v)).collect();
|
||||
let encodings: Vec<EncodingWithQuality> = header
|
||||
.split(',')
|
||||
.map(|v| parse_encoding_with_quality(v))
|
||||
.collect();
|
||||
|
||||
let mut identity_rejected = false;
|
||||
let mut identity_explicitly_accepted = false;
|
||||
@ -97,7 +102,11 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
|
||||
}
|
||||
|
||||
let (mut encodings, identity_rejected) = parse_accept_encoding_header(header);
|
||||
encodings.sort_by(|a, b| b.quality.partial_cmp(&a.quality).unwrap_or(std::cmp::Ordering::Equal));
|
||||
encodings.sort_by(|a, b| {
|
||||
b.quality
|
||||
.partial_cmp(&a.quality)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
for eq in &encodings {
|
||||
if eq.quality == 0.0 {
|
||||
@ -121,7 +130,9 @@ pub fn parse_accept_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
|
||||
return Ok(ENCODING_IDENTITY);
|
||||
}
|
||||
|
||||
Err(format!("no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}"))
|
||||
Err(format!(
|
||||
"no acceptable encoding found, supported: {SUPPORTED_ENCODINGS:?}"
|
||||
))
|
||||
}
|
||||
|
||||
pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String> {
|
||||
@ -143,7 +154,9 @@ pub fn parse_content_encoding<B>(r: &Request<B>) -> Result<&'static str, String>
|
||||
return Ok(ENCODING_GZIP);
|
||||
}
|
||||
|
||||
Err(format!("unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}"))
|
||||
Err(format!(
|
||||
"unsupported Content-Encoding: {header}, supported: {SUPPORTED_ENCODINGS:?}"
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -236,17 +249,26 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn accept_encoding_no_header_returns_identity() {
|
||||
assert_eq!(parse_accept_encoding(&req_no_headers()).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_accept_encoding(&req_no_headers()).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accept_encoding_gzip() {
|
||||
assert_eq!(parse_accept_encoding(&req_with_accept("gzip")).unwrap(), "gzip");
|
||||
assert_eq!(
|
||||
parse_accept_encoding(&req_with_accept("gzip")).unwrap(),
|
||||
"gzip"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accept_encoding_identity_explicit() {
|
||||
assert_eq!(parse_accept_encoding(&req_with_accept("identity")).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_accept_encoding(&req_with_accept("identity")).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -259,7 +281,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn accept_encoding_wildcard_returns_identity() {
|
||||
assert_eq!(parse_accept_encoding(&req_with_accept("*")).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_accept_encoding(&req_with_accept("*")).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -277,7 +302,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn accept_encoding_unsupported_only_falls_to_identity() {
|
||||
assert_eq!(parse_accept_encoding(&req_with_accept("br")).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_accept_encoding(&req_with_accept("br")).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
// is_identity_acceptable
|
||||
@ -311,17 +339,26 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn content_encoding_empty_returns_identity() {
|
||||
assert_eq!(parse_content_encoding(&req_no_headers()).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_content_encoding(&req_no_headers()).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn content_encoding_gzip() {
|
||||
assert_eq!(parse_content_encoding(&req_with_content("gzip")).unwrap(), "gzip");
|
||||
assert_eq!(
|
||||
parse_content_encoding(&req_with_content("gzip")).unwrap(),
|
||||
"gzip"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn content_encoding_identity_explicit() {
|
||||
assert_eq!(parse_content_encoding(&req_with_content("identity")).unwrap(), "identity");
|
||||
assert_eq!(
|
||||
parse_content_encoding(&req_with_content("identity")).unwrap(),
|
||||
"identity"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -331,6 +368,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn content_encoding_case_insensitive() {
|
||||
assert_eq!(parse_content_encoding(&req_with_content("GZIP")).unwrap(), "gzip");
|
||||
assert_eq!(
|
||||
parse_content_encoding(&req_with_content("GZIP")).unwrap(),
|
||||
"gzip"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,8 +18,5 @@ pub async fn get_envs(State(state): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
.map(|entry| (entry.key().clone(), entry.value().clone()))
|
||||
.collect();
|
||||
|
||||
(
|
||||
[(header::CACHE_CONTROL, "no-store")],
|
||||
Json(envs),
|
||||
)
|
||||
([(header::CACHE_CONTROL, "no-store")], Json(envs))
|
||||
}
|
||||
|
||||
@ -72,13 +72,11 @@ pub async fn get_files(
|
||||
let header_token = extract_header_token(&req);
|
||||
|
||||
let default_user = state.defaults.user();
|
||||
let username = match execcontext::resolve_default_username(
|
||||
params.username.as_deref(),
|
||||
&default_user,
|
||||
) {
|
||||
Ok(u) => u.to_string(),
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
let username =
|
||||
match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
|
||||
Ok(u) => u.to_string(),
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
|
||||
if let Err(e) = validate_file_signing(
|
||||
&state,
|
||||
@ -98,8 +96,7 @@ pub async fn get_files(
|
||||
|
||||
let home_dir = user.dir.to_string_lossy().to_string();
|
||||
let default_workdir = state.defaults.workdir();
|
||||
let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref())
|
||||
{
|
||||
let resolved = match expand_and_resolve(path_str, &home_dir, default_workdir.as_deref()) {
|
||||
Ok(p) => p,
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
|
||||
};
|
||||
@ -177,8 +174,7 @@ pub async fn get_files(
|
||||
.unwrap_or("application/octet-stream");
|
||||
|
||||
if use_encoding == "gzip" {
|
||||
let mut encoder =
|
||||
flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
|
||||
let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
|
||||
if let Err(e) = encoder.write_all(&file_data) {
|
||||
return json_error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
@ -225,13 +221,11 @@ pub async fn post_files(
|
||||
let header_token = extract_header_token(&req);
|
||||
|
||||
let default_user = state.defaults.user();
|
||||
let username = match execcontext::resolve_default_username(
|
||||
params.username.as_deref(),
|
||||
&default_user,
|
||||
) {
|
||||
Ok(u) => u.to_string(),
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
let username =
|
||||
match execcontext::resolve_default_username(params.username.as_deref(), &default_user) {
|
||||
Ok(u) => u.to_string(),
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, e),
|
||||
};
|
||||
|
||||
if let Err(e) = validate_file_signing(
|
||||
&state,
|
||||
@ -283,10 +277,7 @@ pub async fn post_files(
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
|
||||
}
|
||||
} else {
|
||||
let fname = field
|
||||
.file_name()
|
||||
.unwrap_or("upload")
|
||||
.to_string();
|
||||
let fname = field.file_name().unwrap_or("upload").to_string();
|
||||
match expand_and_resolve(&fname, &home_dir, default_workdir.as_deref()) {
|
||||
Ok(p) => p,
|
||||
Err(e) => return json_error(StatusCode::BAD_REQUEST, &e),
|
||||
@ -382,7 +373,7 @@ fn process_file(
|
||||
return Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("error getting file info: {e}"),
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
@ -395,7 +386,7 @@ fn process_file(
|
||||
return Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("error changing ownership: {e}"),
|
||||
))
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,6 +26,9 @@ pub struct InitRequest {
|
||||
pub volume_mounts: Option<Vec<VolumeMount>>,
|
||||
pub sandbox_id: Option<String>,
|
||||
pub template_id: Option<String>,
|
||||
/// Public proxy domain (e.g. "wrenn.dev"). Used by `envd ports` to build
|
||||
/// the {port}-{sandbox_id}.{domain} URLs.
|
||||
pub proxy_domain: Option<String>,
|
||||
/// New lifecycle identifier for this resume. When it changes between
|
||||
/// /init calls, envd treats the call as a post-resume hook: port
|
||||
/// forwarder is restarted and NFS mounts are refreshed.
|
||||
@ -183,14 +186,32 @@ pub async fn post_init(
|
||||
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
|
||||
unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) };
|
||||
write_run_file(".WRENN_SANDBOX_ID", id);
|
||||
state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone());
|
||||
state
|
||||
.defaults
|
||||
.env_vars
|
||||
.insert("WRENN_SANDBOX_ID".into(), id.clone());
|
||||
}
|
||||
if let Some(ref id) = init_req.template_id {
|
||||
tracing::debug!(template_id = %id, "setting template ID from init request");
|
||||
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
|
||||
unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) };
|
||||
write_run_file(".WRENN_TEMPLATE_ID", id);
|
||||
state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone());
|
||||
state
|
||||
.defaults
|
||||
.env_vars
|
||||
.insert("WRENN_TEMPLATE_ID".into(), id.clone());
|
||||
}
|
||||
if let Some(ref domain) = init_req.proxy_domain {
|
||||
if !domain.is_empty() {
|
||||
tracing::debug!(proxy_domain = %domain, "setting proxy domain from init request");
|
||||
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
|
||||
unsafe { std::env::set_var("WRENN_PROXY_DOMAIN", domain) };
|
||||
write_run_file(".WRENN_PROXY_DOMAIN", domain);
|
||||
state
|
||||
.defaults
|
||||
.env_vars
|
||||
.insert("WRENN_PROXY_DOMAIN".into(), domain.clone());
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
@ -202,7 +223,10 @@ pub async fn post_init(
|
||||
|
||||
async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> {
|
||||
// Fast path: matches existing token
|
||||
if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) {
|
||||
if state.access_token.is_set()
|
||||
&& !request_token.is_empty()
|
||||
&& state.access_token.equals(request_token)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@ -241,10 +265,7 @@ async fn setup_hyperloop(address: &str, env_vars: &dashmap::DashMap<String, Stri
|
||||
}
|
||||
}
|
||||
|
||||
env_vars.insert(
|
||||
"WRENN_EVENTS_ADDRESS".into(),
|
||||
format!("http://{address}"),
|
||||
);
|
||||
env_vars.insert("WRENN_EVENTS_ADDRESS".into(), format!("http://{address}"));
|
||||
}
|
||||
|
||||
async fn setup_nfs(nfs_target: &str, path: &str) {
|
||||
@ -287,7 +308,7 @@ async fn setup_nfs(nfs_target: &str, path: &str) {
|
||||
}
|
||||
|
||||
fn write_run_file(name: &str, value: &str) {
|
||||
let dir = std::path::Path::new("/run/wrenn");
|
||||
let dir = std::path::Path::new(crate::config::WRENN_RUN_DIR);
|
||||
if let Err(e) = std::fs::create_dir_all(dir) {
|
||||
tracing::warn!(error = %e, "failed to create /run/wrenn");
|
||||
return;
|
||||
@ -309,4 +330,3 @@ fn parse_timestamp_to_nanos(ts: &str) -> Result<i64, ()> {
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
pub mod activity;
|
||||
pub mod encoding;
|
||||
pub mod envs;
|
||||
pub mod error;
|
||||
@ -13,8 +14,8 @@ use std::time::Duration;
|
||||
|
||||
use axum::Router;
|
||||
use axum::routing::{get, post};
|
||||
use http::header::{CACHE_CONTROL, HeaderName};
|
||||
use http::Method;
|
||||
use http::header::{CACHE_CONTROL, HeaderName};
|
||||
use tower_http::cors::{AllowHeaders, AllowMethods, AllowOrigin, CorsLayer};
|
||||
|
||||
use crate::config::CORS_MAX_AGE;
|
||||
@ -47,6 +48,7 @@ pub fn router(state: Arc<AppState>) -> Router {
|
||||
|
||||
Router::new()
|
||||
.route("/health", get(health::get_health))
|
||||
.route("/activity", get(activity::get_activity))
|
||||
.route("/metrics", get(metrics::get_metrics))
|
||||
.route("/envs", get(envs::get_envs))
|
||||
.route("/init", post(init::post_init))
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
mod auth;
|
||||
mod cgroups;
|
||||
mod cmd;
|
||||
mod config;
|
||||
mod conntracker;
|
||||
mod crypto;
|
||||
@ -39,6 +40,10 @@ const COMMIT: &str = {
|
||||
#[derive(Parser)]
|
||||
#[command(name = "envd", about = "Wrenn guest agent daemon")]
|
||||
struct Cli {
|
||||
/// Client subcommand. When omitted, envd runs as the guest daemon.
|
||||
#[command(subcommand)]
|
||||
command: Option<Commands>,
|
||||
|
||||
#[arg(long, default_value_t = DEFAULT_PORT)]
|
||||
port: u16,
|
||||
|
||||
@ -55,6 +60,12 @@ struct Cli {
|
||||
cgroup_root: String,
|
||||
}
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
enum Commands {
|
||||
/// List externally-reachable open ports and the URL each is served at.
|
||||
Ports(cmd::ports::PortsArgs),
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let cli = Cli::parse();
|
||||
@ -68,6 +79,11 @@ async fn main() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Client subcommands are short-lived: run and exit before any daemon setup.
|
||||
if let Some(Commands::Ports(args)) = &cli.command {
|
||||
std::process::exit(cmd::ports::run(args));
|
||||
}
|
||||
|
||||
logging::init(true);
|
||||
|
||||
if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) {
|
||||
@ -85,36 +101,35 @@ async fn main() {
|
||||
}
|
||||
|
||||
// Cgroup manager
|
||||
let cgroup_manager: Arc<dyn cgroups::CgroupManager> =
|
||||
match cgroups::Cgroup2Manager::new(
|
||||
&cli.cgroup_root,
|
||||
&[
|
||||
(
|
||||
cgroups::ProcessType::Pty,
|
||||
"wrenn/pty",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
(
|
||||
cgroups::ProcessType::User,
|
||||
"wrenn/user",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
(
|
||||
cgroups::ProcessType::Socat,
|
||||
"wrenn/socat",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
],
|
||||
) {
|
||||
Ok(m) => {
|
||||
tracing::info!("cgroup2 manager initialized");
|
||||
Arc::new(m)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "cgroup2 init failed, using noop");
|
||||
Arc::new(cgroups::NoopCgroupManager)
|
||||
}
|
||||
};
|
||||
let cgroup_manager: Arc<dyn cgroups::CgroupManager> = match cgroups::Cgroup2Manager::new(
|
||||
&cli.cgroup_root,
|
||||
&[
|
||||
(
|
||||
cgroups::ProcessType::Pty,
|
||||
"wrenn/pty",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
(
|
||||
cgroups::ProcessType::User,
|
||||
"wrenn/user",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
(
|
||||
cgroups::ProcessType::Socat,
|
||||
"wrenn/socat",
|
||||
&[] as &[(&str, &str)],
|
||||
),
|
||||
],
|
||||
) {
|
||||
Ok(m) => {
|
||||
tracing::info!("cgroup2 manager initialized");
|
||||
Arc::new(m)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "cgroup2 init failed, using noop");
|
||||
Arc::new(cgroups::NoopCgroupManager)
|
||||
}
|
||||
};
|
||||
|
||||
// Port subsystem
|
||||
let port_subsystem = Arc::new(PortSubsystem::new(Arc::clone(&cgroup_manager)));
|
||||
@ -138,8 +153,7 @@ async fn main() {
|
||||
// RPC services (Connect protocol — serves Connect + gRPC + gRPC-Web on same port)
|
||||
let connect_router = rpc::rpc_router(Arc::clone(&state));
|
||||
|
||||
let app = http::router(Arc::clone(&state))
|
||||
.fallback_service(connect_router.into_axum_service());
|
||||
let app = http::router(Arc::clone(&state)).fallback_service(connect_router.into_axum_service());
|
||||
|
||||
// --cmd: spawn initial process if specified
|
||||
if !cli.start_cmd.is_empty() {
|
||||
@ -151,7 +165,12 @@ async fn main() {
|
||||
}
|
||||
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], cli.port));
|
||||
tracing::info!(port = cli.port, version = VERSION, commit = COMMIT, "envd starting");
|
||||
tracing::info!(
|
||||
port = cli.port,
|
||||
version = VERSION,
|
||||
commit = COMMIT,
|
||||
"envd starting"
|
||||
);
|
||||
|
||||
let listener = TcpListener::bind(addr).await.expect("failed to bind");
|
||||
|
||||
@ -186,9 +205,7 @@ fn spawn_initial_command(cmd: &str, state: &AppState) {
|
||||
|
||||
let home = user.dir.to_string_lossy().to_string();
|
||||
let default_workdir = state.defaults.workdir();
|
||||
let cwd = default_workdir
|
||||
.as_deref()
|
||||
.unwrap_or(&home);
|
||||
let cwd = default_workdir.as_deref().unwrap_or(&home);
|
||||
|
||||
match process_handler::spawn_process(
|
||||
cmd,
|
||||
@ -235,8 +252,7 @@ fn memory_reclaimer(_state: Arc<AppState>) {
|
||||
} else {
|
||||
let mut sys2 = sysinfo::System::new();
|
||||
sys2.refresh_memory();
|
||||
let freed_mb =
|
||||
sys2.available_memory().saturating_sub(available) / (1024 * 1024);
|
||||
let freed_mb = sys2.available_memory().saturating_sub(available) / (1024 * 1024);
|
||||
tracing::info!(used_pct, freed_mb, "page cache dropped");
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
pub mod user;
|
||||
pub mod path;
|
||||
pub mod user;
|
||||
|
||||
@ -94,7 +94,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn tilde_slash_path() {
|
||||
assert_eq!(expand_tilde("~/docs", "/home/user").unwrap(), "/home/user/docs");
|
||||
assert_eq!(
|
||||
expand_tilde("~/docs", "/home/user").unwrap(),
|
||||
"/home/user/docs"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -109,12 +112,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn tilde_relative_no_tilde() {
|
||||
assert_eq!(expand_tilde("relative/path", "/home/u").unwrap(), "relative/path");
|
||||
assert_eq!(
|
||||
expand_tilde("relative/path", "/home/u").unwrap(),
|
||||
"relative/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tilde_cmd_like() {
|
||||
assert_eq!(expand_tilde("~/bin/myapp", "/home/user").unwrap(), "/home/user/bin/myapp");
|
||||
assert_eq!(
|
||||
expand_tilde("~/bin/myapp", "/home/user").unwrap(),
|
||||
"/home/user/bin/myapp"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -144,12 +153,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolve_absolute_passthrough() {
|
||||
assert_eq!(expand_and_resolve("/abs/path", "/home", None).unwrap(), "/abs/path");
|
||||
assert_eq!(
|
||||
expand_and_resolve("/abs/path", "/home", None).unwrap(),
|
||||
"/abs/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_empty_uses_default() {
|
||||
assert_eq!(expand_and_resolve("", "/home", Some("/default")).unwrap(), "/default");
|
||||
assert_eq!(
|
||||
expand_and_resolve("", "/home", Some("/default")).unwrap(),
|
||||
"/default"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -161,7 +176,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolve_tilde_expands() {
|
||||
assert_eq!(expand_and_resolve("~/dir", "/home/u", None).unwrap(), "/home/u/dir");
|
||||
assert_eq!(
|
||||
expand_and_resolve("~/dir", "/home/u", None).unwrap(),
|
||||
"/home/u/dir"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@ -37,6 +37,36 @@ pub fn read_tcp_connections() -> Vec<ConnStat> {
|
||||
conns
|
||||
}
|
||||
|
||||
/// Returns the TCP ports in LISTEN state that are reachable from outside the
|
||||
/// guest through the host proxy. A port qualifies when it is bound to a
|
||||
/// wildcard address (`0.0.0.0`/`::`, directly reachable on the TAP interface)
|
||||
/// or to loopback (`127.0.0.1`/`::1`, bridged to the TAP IP by the socat
|
||||
/// forwarder). Ports bound to any other specific address are not routable from
|
||||
/// the host and are excluded, as is `exclude_port` (envd's own control port).
|
||||
/// The result is deduplicated and sorted ascending.
|
||||
pub fn reachable_listening_ports(exclude_port: u32) -> Vec<u32> {
|
||||
filter_reachable_ports(&read_tcp_connections(), exclude_port)
|
||||
}
|
||||
|
||||
fn filter_reachable_ports(conns: &[ConnStat], exclude_port: u32) -> Vec<u32> {
|
||||
let mut ports: Vec<u32> = conns
|
||||
.iter()
|
||||
.filter(|c| c.status == "LISTEN")
|
||||
.filter(|c| is_reachable_bind(&c.local_ip))
|
||||
.map(|c| c.local_port)
|
||||
.filter(|p| *p != exclude_port)
|
||||
.collect();
|
||||
ports.sort_unstable();
|
||||
ports.dedup();
|
||||
ports
|
||||
}
|
||||
|
||||
/// A bind address is reachable from the host when it is a wildcard (directly
|
||||
/// routed via the TAP interface) or loopback (socat-forwarded to the TAP IP).
|
||||
fn is_reachable_bind(ip: &str) -> bool {
|
||||
matches!(ip, "0.0.0.0" | "::" | "127.0.0.1" | "::1")
|
||||
}
|
||||
|
||||
fn parse_proc_net_tcp(path: &str, family: u32) -> io::Result<Vec<ConnStat>> {
|
||||
let file = std::fs::File::open(path)?;
|
||||
let reader = io::BufReader::new(file);
|
||||
@ -92,7 +122,10 @@ fn parse_hex_addr(s: &str, family: u32) -> Option<(String, u32)> {
|
||||
if ip_bytes.len() != 4 {
|
||||
return None;
|
||||
}
|
||||
format!("{}.{}.{}.{}", ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0])
|
||||
format!(
|
||||
"{}.{}.{}.{}",
|
||||
ip_bytes[3], ip_bytes[2], ip_bytes[1], ip_bytes[0]
|
||||
)
|
||||
} else {
|
||||
if ip_bytes.len() != 16 {
|
||||
return None;
|
||||
@ -257,4 +290,76 @@ mod tests {
|
||||
fn parse_nonexistent_file_errors() {
|
||||
assert!(parse_proc_net_tcp("/nonexistent/path", libc::AF_INET as u32).is_err());
|
||||
}
|
||||
|
||||
// reachable port filtering
|
||||
|
||||
fn conn(ip: &str, port: u32, status: &str) -> ConnStat {
|
||||
ConnStat {
|
||||
local_ip: ip.to_string(),
|
||||
local_port: port,
|
||||
status: status.to_string(),
|
||||
family: libc::AF_INET as u32,
|
||||
inode: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reachable_bind_accepts_wildcard_and_loopback() {
|
||||
assert!(is_reachable_bind("0.0.0.0"));
|
||||
assert!(is_reachable_bind("::"));
|
||||
assert!(is_reachable_bind("127.0.0.1"));
|
||||
assert!(is_reachable_bind("::1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reachable_bind_rejects_specific_address() {
|
||||
assert!(!is_reachable_bind("192.168.1.5"));
|
||||
assert!(!is_reachable_bind("169.254.0.21"));
|
||||
assert!(!is_reachable_bind("10.0.0.1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_keeps_only_listen_state() {
|
||||
let conns = vec![
|
||||
conn("0.0.0.0", 8000, "LISTEN"),
|
||||
conn("0.0.0.0", 9000, "ESTABLISHED"),
|
||||
];
|
||||
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_excludes_unreachable_binds() {
|
||||
let conns = vec![
|
||||
conn("127.0.0.1", 8000, "LISTEN"),
|
||||
conn("169.254.0.21", 8001, "LISTEN"), // socat's own listener
|
||||
conn("192.168.1.5", 8002, "LISTEN"),
|
||||
];
|
||||
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_excludes_envd_control_port() {
|
||||
let conns = vec![
|
||||
conn("0.0.0.0", 49983, "LISTEN"),
|
||||
conn("0.0.0.0", 8000, "LISTEN"),
|
||||
];
|
||||
assert_eq!(filter_reachable_ports(&conns, 49983), vec![8000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_dedups_and_sorts() {
|
||||
// Same port on IPv4 wildcard and IPv6 loopback collapses to one entry.
|
||||
let conns = vec![
|
||||
conn("::1", 8000, "LISTEN"),
|
||||
conn("0.0.0.0", 8000, "LISTEN"),
|
||||
conn("0.0.0.0", 3000, "LISTEN"),
|
||||
];
|
||||
assert_eq!(filter_reachable_ports(&conns, 49983), vec![3000, 8000]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_empty_when_no_listeners() {
|
||||
let conns = vec![conn("0.0.0.0", 8000, "ESTABLISHED")];
|
||||
assert!(filter_reachable_ports(&conns, 49983).is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,9 +53,7 @@ pub fn build_entry_info(path: &str) -> Result<EntryInfo, ConnectError> {
|
||||
Err(_) => FileType::FILE_TYPE_UNSPECIFIED,
|
||||
};
|
||||
|
||||
let target_mode = std::fs::metadata(p)
|
||||
.map(|m| m.mode() & 0o7777)
|
||||
.unwrap_or(0);
|
||||
let target_mode = std::fs::metadata(p).map(|m| m.mode() & 0o7777).unwrap_or(0);
|
||||
|
||||
(target_type, target_mode, Some(target))
|
||||
} else {
|
||||
|
||||
@ -98,8 +98,7 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
}
|
||||
|
||||
let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
|
||||
let user =
|
||||
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
|
||||
ensure_dirs(&path, user.uid, user.gid)
|
||||
.map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
@ -123,8 +122,7 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
let destination = self.resolve_path(request.destination, &ctx)?;
|
||||
|
||||
let username = extract_username(&ctx).unwrap_or_else(|| self.state.defaults.user());
|
||||
let user =
|
||||
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
|
||||
if let Some(parent) = Path::new(&destination).parent() {
|
||||
ensure_dirs(&parent.to_string_lossy(), user.uid, user.gid)
|
||||
@ -206,7 +204,12 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((RemoveResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
RemoveResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
|
||||
async fn watch_dir(
|
||||
@ -247,8 +250,8 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
let events: Arc<Mutex<Vec<FilesystemEvent>>> = Arc::new(Mutex::new(Vec::new()));
|
||||
let events_cb = Arc::clone(&events);
|
||||
|
||||
let mut watcher = notify::recommended_watcher(
|
||||
move |res: Result<notify::Event, notify::Error>| {
|
||||
let mut watcher =
|
||||
notify::recommended_watcher(move |res: Result<notify::Event, notify::Error>| {
|
||||
if let Ok(event) = res {
|
||||
let event_type = match event.kind {
|
||||
notify::EventKind::Create(_) => EventType::EVENT_TYPE_CREATE,
|
||||
@ -275,11 +278,13 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
.map_err(|e| {
|
||||
ConnectError::new(ErrorCode::Internal, format!("failed to create watcher: {e}"))
|
||||
})?;
|
||||
})
|
||||
.map_err(|e| {
|
||||
ConnectError::new(
|
||||
ErrorCode::Internal,
|
||||
format!("failed to create watcher: {e}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mode = if recursive {
|
||||
RecursiveMode::Recursive
|
||||
@ -342,7 +347,12 @@ impl Filesystem for FilesystemServiceImpl {
|
||||
) -> Result<(RemoveWatcherResponse, Context), ConnectError> {
|
||||
let watcher_id: &str = request.watcher_id;
|
||||
self.watchers.remove(watcher_id);
|
||||
Ok((RemoveWatcherResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
RemoveWatcherResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,17 +1,17 @@
|
||||
pub mod pb;
|
||||
pub mod entry;
|
||||
pub mod filesystem_service;
|
||||
pub mod pb;
|
||||
pub mod process_handler;
|
||||
pub mod process_service;
|
||||
pub mod filesystem_service;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::rpc::process_service::ProcessServiceImpl;
|
||||
use crate::rpc::filesystem_service::FilesystemServiceImpl;
|
||||
use crate::rpc::process_service::ProcessServiceImpl;
|
||||
use crate::state::AppState;
|
||||
|
||||
use pb::process::ProcessExt;
|
||||
use pb::filesystem::FilesystemExt;
|
||||
use pb::process::ProcessExt;
|
||||
|
||||
/// Build the connect-rust Router with both RPC services registered.
|
||||
pub fn rpc_router(state: Arc<AppState>) -> connectrpc::Router {
|
||||
|
||||
@ -1,4 +1,9 @@
|
||||
#![allow(dead_code, non_camel_case_types, unused_imports, clippy::derivable_impls)]
|
||||
#![allow(
|
||||
dead_code,
|
||||
non_camel_case_types,
|
||||
unused_imports,
|
||||
clippy::derivable_impls
|
||||
)]
|
||||
|
||||
use ::buffa;
|
||||
use ::buffa_types;
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::io::Read;
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::process::Stdio;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use connectrpc::{ConnectError, ErrorCode};
|
||||
use nix::pty::{openpty, Winsize};
|
||||
use nix::pty::{Winsize, openpty};
|
||||
use nix::sys::signal::{self, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use tokio::sync::broadcast;
|
||||
@ -15,6 +16,11 @@ const STD_CHUNK_SIZE: usize = 32768;
|
||||
const PTY_CHUNK_SIZE: usize = 16384;
|
||||
const BROADCAST_CAPACITY: usize = 4096;
|
||||
|
||||
// Upper bound on the per-process output kept for replay. A late Connect gets
|
||||
// the most recent OUTPUT_LOG_CAPACITY bytes (older output is evicted) so the
|
||||
// buffer can never grow without bound for a chatty long-running process.
|
||||
const OUTPUT_LOG_CAPACITY: usize = 256 * 1024;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum DataEvent {
|
||||
Stdout(Vec<u8>),
|
||||
@ -30,6 +36,37 @@ pub struct EndEvent {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
/// Bounded ring of recent output, kept so a late Connect can replay what it
|
||||
/// missed. Evicts oldest events once the retained bytes exceed the cap.
|
||||
#[derive(Default)]
|
||||
struct OutputLog {
|
||||
events: VecDeque<DataEvent>,
|
||||
bytes: usize,
|
||||
}
|
||||
|
||||
impl OutputLog {
|
||||
fn push(&mut self, ev: &DataEvent) {
|
||||
self.bytes += ev_len(ev);
|
||||
self.events.push_back(ev.clone());
|
||||
while self.bytes > OUTPUT_LOG_CAPACITY {
|
||||
match self.events.pop_front() {
|
||||
Some(old) => self.bytes -= ev_len(&old),
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn snapshot(&self) -> Vec<DataEvent> {
|
||||
self.events.iter().cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
fn ev_len(ev: &DataEvent) -> usize {
|
||||
match ev {
|
||||
DataEvent::Stdout(d) | DataEvent::Stderr(d) | DataEvent::Pty(d) => d.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ProcessHandle {
|
||||
pub config: ProcessConfig,
|
||||
pub tag: Option<String>,
|
||||
@ -38,6 +75,7 @@ pub struct ProcessHandle {
|
||||
data_tx: broadcast::Sender<DataEvent>,
|
||||
end_tx: broadcast::Sender<EndEvent>,
|
||||
ended: Mutex<Option<EndEvent>>,
|
||||
output_log: Mutex<OutputLog>,
|
||||
|
||||
stdin: Mutex<Option<std::process::ChildStdin>>,
|
||||
pty_master: Mutex<Option<std::fs::File>>,
|
||||
@ -48,6 +86,26 @@ impl ProcessHandle {
|
||||
self.data_tx.subscribe()
|
||||
}
|
||||
|
||||
/// Append a chunk to the replay buffer and broadcast it live, under one
|
||||
/// lock. The shared lock is what makes [`subscribe_data_replay`] race-free:
|
||||
/// a concurrent attach sees this chunk either in its snapshot or on its live
|
||||
/// receiver — never both, never neither.
|
||||
pub fn publish_data(&self, ev: DataEvent) {
|
||||
let mut log = self.output_log.lock().unwrap();
|
||||
log.push(&ev);
|
||||
let _ = self.data_tx.send(ev);
|
||||
}
|
||||
|
||||
/// Snapshot the buffered output and subscribe to live output atomically, so
|
||||
/// a late Connect replays what it missed and then continues live with no gap
|
||||
/// or duplicate across the handoff.
|
||||
pub fn subscribe_data_replay(&self) -> (Vec<DataEvent>, broadcast::Receiver<DataEvent>) {
|
||||
let log = self.output_log.lock().unwrap();
|
||||
let snapshot = log.snapshot();
|
||||
let rx = self.data_tx.subscribe();
|
||||
(snapshot, rx)
|
||||
}
|
||||
|
||||
pub fn subscribe_end(&self) -> broadcast::Receiver<EndEvent> {
|
||||
self.end_tx.subscribe()
|
||||
}
|
||||
@ -160,6 +218,9 @@ pub fn spawn_process(
|
||||
env.push(("HOME".into(), home));
|
||||
env.push(("USER".into(), user.name.clone()));
|
||||
env.push(("LOGNAME".into(), user.name.clone()));
|
||||
if !user.shell.as_os_str().is_empty() {
|
||||
env.push(("SHELL".into(), user.shell.to_string_lossy().to_string()));
|
||||
}
|
||||
|
||||
default_env_vars.iter().for_each(|entry| {
|
||||
env.push((entry.key().clone(), entry.value().clone()));
|
||||
@ -179,21 +240,40 @@ pub fn spawn_process(
|
||||
let nice_delta = 0 - current_nice();
|
||||
let profile_source = r#"test -f /etc/profile && . /etc/profile
|
||||
test -f "${HOME}/.bashrc" && . "${HOME}/.bashrc""#;
|
||||
let oom_script = if nice_delta > 0 {
|
||||
format!(
|
||||
r#"echo 100 > /proc/$$/oom_score_adj
|
||||
{}
|
||||
exec /usr/bin/nice -n {} "${{@}}""#,
|
||||
profile_source, nice_delta,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
r#"echo 100 > /proc/$$/oom_score_adj
|
||||
{}
|
||||
exec "$@""#,
|
||||
profile_source
|
||||
)
|
||||
|
||||
// Resolve the user's login shell, falling back to /bin/sh. Commands without
|
||||
// explicit args are interpreted by this shell so pipes, quoting, escape
|
||||
// sequences, backslash line-continuations, and other shell syntax work
|
||||
// without the caller having to wrap them in `sh -c` themselves.
|
||||
let shell = {
|
||||
let s = user.shell.to_string_lossy();
|
||||
if s.is_empty() {
|
||||
"/bin/sh".to_string()
|
||||
} else {
|
||||
s.to_string()
|
||||
}
|
||||
};
|
||||
|
||||
// What the wrapper finally exec's, after the optional `nice` prefix.
|
||||
// - no args: run cmd_str as a shell command line via the login shell
|
||||
// ($1 is cmd_str; $0 of the inner shell is the shell path).
|
||||
// - with args: exec the program + args directly, no shell interpretation
|
||||
// (backward-compatible program/argv form).
|
||||
let target = if args.is_empty() {
|
||||
format!(r#""{shell}" -c "$1" "{shell}""#)
|
||||
} else {
|
||||
r#""$@""#.to_string()
|
||||
};
|
||||
let nice_prefix = if nice_delta > 0 {
|
||||
format!("/usr/bin/nice -n {nice_delta} ")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let oom_script = format!(
|
||||
r#"echo 100 > /proc/$$/oom_score_adj
|
||||
{profile_source}
|
||||
exec {nice_prefix}{target}"#
|
||||
);
|
||||
let mut wrapper_args = vec![
|
||||
"-c".to_string(),
|
||||
oom_script,
|
||||
@ -264,7 +344,10 @@ exec "$@""#,
|
||||
command.stderr(Stdio::null());
|
||||
|
||||
let child = command.spawn().map_err(|e| {
|
||||
ConnectError::new(ErrorCode::Internal, format!("error starting pty process: {e}"))
|
||||
ConnectError::new(
|
||||
ErrorCode::Internal,
|
||||
format!("error starting pty process: {e}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
drop(slave_fd);
|
||||
@ -280,6 +363,7 @@ exec "$@""#,
|
||||
data_tx: data_tx.clone(),
|
||||
end_tx: end_tx.clone(),
|
||||
ended: Mutex::new(None),
|
||||
output_log: Mutex::new(OutputLog::default()),
|
||||
stdin: Mutex::new(None),
|
||||
pty_master: Mutex::new(Some(master_file)),
|
||||
});
|
||||
@ -287,7 +371,7 @@ exec "$@""#,
|
||||
let data_rx = handle.subscribe_data();
|
||||
let end_rx = handle.subscribe_end();
|
||||
|
||||
let data_tx_clone = data_tx.clone();
|
||||
let handle_for_reader = Arc::clone(&handle);
|
||||
let pty_reader = std::thread::spawn(move || {
|
||||
let mut master = master_clone;
|
||||
let mut buf = vec![0u8; PTY_CHUNK_SIZE];
|
||||
@ -295,7 +379,7 @@ exec "$@""#,
|
||||
match master.read(&mut buf) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
let _ = data_tx_clone.send(DataEvent::Pty(buf[..n].to_vec()));
|
||||
handle_for_reader.publish_data(DataEvent::Pty(buf[..n].to_vec()));
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@ -329,7 +413,11 @@ exec "$@""#,
|
||||
});
|
||||
|
||||
tracing::info!(pid, cmd = cmd_str, "process started (pty)");
|
||||
Ok(SpawnedProcess { handle, data_rx, end_rx })
|
||||
Ok(SpawnedProcess {
|
||||
handle,
|
||||
data_rx,
|
||||
end_rx,
|
||||
})
|
||||
} else {
|
||||
let mut command = std::process::Command::new("/bin/bash");
|
||||
command
|
||||
@ -375,6 +463,7 @@ exec "$@""#,
|
||||
data_tx: data_tx.clone(),
|
||||
end_tx: end_tx.clone(),
|
||||
ended: Mutex::new(None),
|
||||
output_log: Mutex::new(OutputLog::default()),
|
||||
stdin: Mutex::new(stdin),
|
||||
pty_master: Mutex::new(None),
|
||||
});
|
||||
@ -385,14 +474,14 @@ exec "$@""#,
|
||||
let mut output_readers: Vec<std::thread::JoinHandle<()>> = Vec::new();
|
||||
|
||||
if let Some(mut out) = stdout {
|
||||
let tx = data_tx.clone();
|
||||
let handle_for_reader = Arc::clone(&handle);
|
||||
output_readers.push(std::thread::spawn(move || {
|
||||
let mut buf = vec![0u8; STD_CHUNK_SIZE];
|
||||
loop {
|
||||
match out.read(&mut buf) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
let _ = tx.send(DataEvent::Stdout(buf[..n].to_vec()));
|
||||
handle_for_reader.publish_data(DataEvent::Stdout(buf[..n].to_vec()));
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@ -401,14 +490,14 @@ exec "$@""#,
|
||||
}
|
||||
|
||||
if let Some(mut err_pipe) = stderr {
|
||||
let tx = data_tx.clone();
|
||||
let handle_for_reader = Arc::clone(&handle);
|
||||
output_readers.push(std::thread::spawn(move || {
|
||||
let mut buf = vec![0u8; STD_CHUNK_SIZE];
|
||||
loop {
|
||||
match err_pipe.read(&mut buf) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
let _ = tx.send(DataEvent::Stderr(buf[..n].to_vec()));
|
||||
handle_for_reader.publish_data(DataEvent::Stderr(buf[..n].to_vec()));
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@ -444,7 +533,11 @@ exec "$@""#,
|
||||
});
|
||||
|
||||
tracing::info!(pid, cmd = cmd_str, "process started (pipe)");
|
||||
Ok(SpawnedProcess { handle, data_rx, end_rx })
|
||||
Ok(SpawnedProcess {
|
||||
handle,
|
||||
data_rx,
|
||||
end_rx,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -4,7 +4,8 @@ use std::sync::Arc;
|
||||
|
||||
use connectrpc::{ConnectError, Context, ErrorCode};
|
||||
use dashmap::DashMap;
|
||||
use futures::Stream;
|
||||
use futures::{Stream, StreamExt};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use crate::permissions::path::{expand_and_resolve, expand_tilde};
|
||||
use crate::permissions::user::lookup_user;
|
||||
@ -72,8 +73,7 @@ impl ProcessServiceImpl {
|
||||
})?;
|
||||
|
||||
let username = self.state.defaults.user();
|
||||
let user =
|
||||
lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
let user = lookup_user(&username).map_err(|e| ConnectError::new(ErrorCode::Internal, e))?;
|
||||
|
||||
let cmd_raw: &str = proc_config.cmd;
|
||||
let args_raw: Vec<String> = proc_config.args.iter().map(|s| s.to_string()).collect();
|
||||
@ -87,7 +87,8 @@ impl ProcessServiceImpl {
|
||||
|
||||
let cmd = expand_tilde(cmd_raw, &home_dir)
|
||||
.map_err(|e| ConnectError::new(ErrorCode::InvalidArgument, e))?;
|
||||
let args: Vec<String> = args_raw.into_iter()
|
||||
let args: Vec<String> = args_raw
|
||||
.into_iter()
|
||||
.map(|a| expand_tilde(&a, &home_dir).unwrap_or(a))
|
||||
.collect();
|
||||
|
||||
@ -136,7 +137,8 @@ impl ProcessServiceImpl {
|
||||
&self.state.defaults.env_vars,
|
||||
)?;
|
||||
|
||||
self.processes.insert(spawned.handle.pid, Arc::clone(&spawned.handle));
|
||||
self.processes
|
||||
.insert(spawned.handle.pid, Arc::clone(&spawned.handle));
|
||||
|
||||
let processes = Arc::clone(&self.processes);
|
||||
let pid = spawned.handle.pid;
|
||||
@ -203,50 +205,10 @@ impl Process for ProcessServiceImpl {
|
||||
let spawned = self.spawn_from_request(&request)?;
|
||||
let pid = spawned.handle.pid;
|
||||
|
||||
let mut data_rx = spawned.data_rx;
|
||||
let mut end_rx = spawned.end_rx;
|
||||
|
||||
let stream = async_stream::stream! {
|
||||
yield Ok(make_start_response(pid));
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased;
|
||||
data = data_rx.recv() => {
|
||||
match data {
|
||||
Ok(ev) => yield Ok(make_data_start_response(ev)),
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
|
||||
// Data channel closed: the process ended and its
|
||||
// handle was dropped. The end event is published
|
||||
// before the handle drop, so it is still buffered
|
||||
// — emit it rather than losing the exit code.
|
||||
if let Ok(end) = end_rx.try_recv() {
|
||||
yield Ok(make_end_start_response(end));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
end = end_rx.recv() => {
|
||||
// Process ended. The waiter joins the output readers
|
||||
// before sending this event, so every byte is already
|
||||
// in the data channel — drain it fully before the end.
|
||||
loop {
|
||||
match data_rx.try_recv() {
|
||||
Ok(ev) => yield Ok(make_data_start_response(ev)),
|
||||
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
if let Ok(end) = end {
|
||||
yield Ok(make_end_start_response(end));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
// Start subscribes before any output is produced, so there is nothing to
|
||||
// replay and the process cannot have ended yet.
|
||||
let stream = process_event_stream(pid, Vec::new(), spawned.data_rx, spawned.end_rx, None)
|
||||
.map(|r| r.map(wrap_start_response));
|
||||
|
||||
Ok((Box::pin(stream), ctx))
|
||||
}
|
||||
@ -268,81 +230,17 @@ impl Process for ProcessServiceImpl {
|
||||
let handle = self.get_process_by_selector(selector)?;
|
||||
let pid = handle.pid;
|
||||
|
||||
let mut data_rx = handle.subscribe_data();
|
||||
let mut end_rx = handle.subscribe_end();
|
||||
// Snapshot buffered output + subscribe live atomically, then read the
|
||||
// exit state. Ordering matters: end_rx must be subscribed before
|
||||
// cached_end is read so a process that exits in the window is still
|
||||
// observed (via the channel if subscribed in time, via cached_end
|
||||
// otherwise).
|
||||
let (replay, data_rx) = handle.subscribe_data_replay();
|
||||
let end_rx = handle.subscribe_end();
|
||||
let cached_end = handle.cached_end();
|
||||
|
||||
let stream = async_stream::stream! {
|
||||
yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(ProcessEvent {
|
||||
event: Some(process_event::Event::Start(Box::new(
|
||||
process_event::StartEvent { pid, ..Default::default() },
|
||||
))),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
if let Some(end) = cached_end {
|
||||
yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(make_end_event(end)),
|
||||
..Default::default()
|
||||
});
|
||||
} else {
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased;
|
||||
data = data_rx.recv() => {
|
||||
match data {
|
||||
Ok(ev) => {
|
||||
yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(make_data_event(ev)),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
|
||||
// Data channel closed: the process ended and
|
||||
// its handle was dropped. The end event is
|
||||
// published before the handle drop, so it is
|
||||
// still buffered — emit it rather than losing
|
||||
// the exit code.
|
||||
if let Ok(end) = end_rx.try_recv() {
|
||||
yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(make_end_event(end)),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
end = end_rx.recv() => {
|
||||
// Process ended. The waiter joins the output readers
|
||||
// before sending this event, so every byte is already
|
||||
// in the data channel — drain it fully before the end.
|
||||
loop {
|
||||
match data_rx.try_recv() {
|
||||
Ok(ev) => yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(make_data_event(ev)),
|
||||
..Default::default()
|
||||
}),
|
||||
Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
if let Ok(end) = end {
|
||||
yield Ok(ConnectResponse {
|
||||
event: buffa::MessageField::some(make_end_event(end)),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let stream = process_event_stream(pid, replay, data_rx, end_rx, cached_end)
|
||||
.map(|r| r.map(wrap_connect_response));
|
||||
|
||||
Ok((Box::pin(stream), ctx))
|
||||
}
|
||||
@ -363,7 +261,12 @@ impl Process for ProcessServiceImpl {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((UpdateResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
UpdateResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
|
||||
async fn stream_input(
|
||||
@ -372,11 +275,11 @@ impl Process for ProcessServiceImpl {
|
||||
mut requests: Pin<
|
||||
Box<
|
||||
dyn Stream<
|
||||
Item = Result<
|
||||
buffa::view::OwnedView<StreamInputRequestView<'static>>,
|
||||
ConnectError,
|
||||
>,
|
||||
> + Send,
|
||||
Item = Result<
|
||||
buffa::view::OwnedView<StreamInputRequestView<'static>>,
|
||||
ConnectError,
|
||||
>,
|
||||
> + Send,
|
||||
>,
|
||||
>,
|
||||
) -> Result<(StreamInputResponse, Context), ConnectError> {
|
||||
@ -405,7 +308,12 @@ impl Process for ProcessServiceImpl {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((StreamInputResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
StreamInputResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
|
||||
async fn send_input(
|
||||
@ -422,7 +330,12 @@ impl Process for ProcessServiceImpl {
|
||||
write_input(&handle, input)?;
|
||||
}
|
||||
|
||||
Ok((SendInputResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
SendInputResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
|
||||
async fn send_signal(
|
||||
@ -442,12 +355,17 @@ impl Process for ProcessServiceImpl {
|
||||
return Err(ConnectError::new(
|
||||
ErrorCode::InvalidArgument,
|
||||
"invalid or unspecified signal",
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
handle.send_signal(sig)?;
|
||||
Ok((SendSignalResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
SendSignalResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
|
||||
async fn close_stdin(
|
||||
@ -460,7 +378,12 @@ impl Process for ProcessServiceImpl {
|
||||
})?;
|
||||
let handle = self.get_process_by_selector(selector)?;
|
||||
handle.close_stdin()?;
|
||||
Ok((CloseStdinResponse { ..Default::default() }, ctx))
|
||||
Ok((
|
||||
CloseStdinResponse {
|
||||
..Default::default()
|
||||
},
|
||||
ctx,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@ -472,17 +395,106 @@ fn write_input(handle: &ProcessHandle, input: &ProcessInputView) -> Result<(), C
|
||||
}
|
||||
}
|
||||
|
||||
fn make_start_response(pid: u32) -> StartResponse {
|
||||
/// Shared event pump for `Start` and `Connect`. Yields a leading start event,
|
||||
/// replays any buffered output (empty for `Start`), then forwards live output
|
||||
/// and the final exit event. The caller wraps each `ProcessEvent` into its own
|
||||
/// response envelope, so the streaming logic lives in exactly one place.
|
||||
fn process_event_stream(
|
||||
pid: u32,
|
||||
replay: Vec<DataEvent>,
|
||||
mut data_rx: broadcast::Receiver<DataEvent>,
|
||||
mut end_rx: broadcast::Receiver<process_handler::EndEvent>,
|
||||
cached_end: Option<process_handler::EndEvent>,
|
||||
) -> impl Stream<Item = Result<ProcessEvent, ConnectError>> {
|
||||
use broadcast::error::{RecvError, TryRecvError};
|
||||
|
||||
async_stream::stream! {
|
||||
yield Ok(make_start_event(pid));
|
||||
|
||||
for ev in replay {
|
||||
yield Ok(make_data_event(ev));
|
||||
}
|
||||
|
||||
// Process already exited before we attached. The snapshot above covers
|
||||
// output up to the attach point; drain anything the live receiver
|
||||
// buffered after the snapshot, then emit the cached exit. end_rx may
|
||||
// never deliver here — a broadcast receiver only sees events sent after
|
||||
// it subscribed, and the exit can predate that — so cached_end is the
|
||||
// source of truth.
|
||||
if let Some(end) = cached_end {
|
||||
loop {
|
||||
match data_rx.try_recv() {
|
||||
Ok(ev) => yield Ok(make_data_event(ev)),
|
||||
Err(TryRecvError::Lagged(_)) => continue,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
yield Ok(make_end_event(end));
|
||||
return;
|
||||
}
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
biased;
|
||||
data = data_rx.recv() => {
|
||||
match data {
|
||||
Ok(ev) => yield Ok(make_data_event(ev)),
|
||||
Err(RecvError::Lagged(_)) => continue,
|
||||
Err(RecvError::Closed) => {
|
||||
// Data channel closed: the process ended and its
|
||||
// handle was dropped. The end event is published
|
||||
// before the handle drop, so it is still buffered —
|
||||
// emit it rather than losing the exit code.
|
||||
if let Ok(end) = end_rx.try_recv() {
|
||||
yield Ok(make_end_event(end));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
end = end_rx.recv() => {
|
||||
// Process ended. The waiter joins the output readers before
|
||||
// sending this event, so every byte is already in the data
|
||||
// channel — drain it fully before the end.
|
||||
loop {
|
||||
match data_rx.try_recv() {
|
||||
Ok(ev) => yield Ok(make_data_event(ev)),
|
||||
Err(TryRecvError::Lagged(_)) => continue,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
if let Ok(end) = end {
|
||||
yield Ok(make_end_event(end));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn wrap_start_response(event: ProcessEvent) -> StartResponse {
|
||||
StartResponse {
|
||||
event: buffa::MessageField::some(ProcessEvent {
|
||||
event: Some(process_event::Event::Start(Box::new(
|
||||
process_event::StartEvent {
|
||||
pid,
|
||||
..Default::default()
|
||||
},
|
||||
))),
|
||||
..Default::default()
|
||||
}),
|
||||
event: buffa::MessageField::some(event),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn wrap_connect_response(event: ProcessEvent) -> ConnectResponse {
|
||||
ConnectResponse {
|
||||
event: buffa::MessageField::some(event),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn make_start_event(pid: u32) -> ProcessEvent {
|
||||
ProcessEvent {
|
||||
event: Some(process_event::Event::Start(Box::new(
|
||||
process_event::StartEvent {
|
||||
pid,
|
||||
..Default::default()
|
||||
},
|
||||
))),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@ -504,13 +516,6 @@ fn make_data_event(ev: DataEvent) -> ProcessEvent {
|
||||
}
|
||||
}
|
||||
|
||||
fn make_data_start_response(ev: DataEvent) -> StartResponse {
|
||||
StartResponse {
|
||||
event: buffa::MessageField::some(make_data_event(ev)),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
|
||||
ProcessEvent {
|
||||
event: Some(process_event::Event::End(Box::new(
|
||||
@ -526,13 +531,6 @@ fn make_end_event(end: process_handler::EndEvent) -> ProcessEvent {
|
||||
}
|
||||
}
|
||||
|
||||
fn make_end_start_response(end: process_handler::EndEvent) -> StartResponse {
|
||||
StartResponse {
|
||||
event: buffa::MessageField::some(make_end_event(end)),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -589,7 +587,8 @@ mod tests {
|
||||
fn args_other_user_left_literal() {
|
||||
let home_dir = "/home/testuser";
|
||||
let args_raw = vec!["~other".to_string(), "~other/path".to_string()];
|
||||
let args: Vec<String> = args_raw.into_iter()
|
||||
let args: Vec<String> = args_raw
|
||||
.into_iter()
|
||||
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
|
||||
.collect();
|
||||
assert_eq!(args, vec!["~other", "~other/path"]);
|
||||
@ -618,17 +617,22 @@ mod tests {
|
||||
"/tmp/out".to_string(),
|
||||
"~other".to_string(),
|
||||
];
|
||||
let args: Vec<String> = args_raw.into_iter()
|
||||
let args: Vec<String> = args_raw
|
||||
.into_iter()
|
||||
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
|
||||
.collect();
|
||||
assert_eq!(args, vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]);
|
||||
assert_eq!(
|
||||
args,
|
||||
vec!["-p", "/home/testuser/data", "/tmp/out", "~other"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn args_empty_passthrough() {
|
||||
let home_dir = "/home/testuser";
|
||||
let args_raw: Vec<String> = vec![];
|
||||
let args: Vec<String> = args_raw.into_iter()
|
||||
let args: Vec<String> = args_raw
|
||||
.into_iter()
|
||||
.map(|a| expand_tilde(&a, home_dir).unwrap_or(a))
|
||||
.collect();
|
||||
assert!(args.is_empty());
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicU8, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::auth::token::SecureToken;
|
||||
@ -17,6 +17,11 @@ pub struct AppState {
|
||||
pub port_subsystem: Option<Arc<PortSubsystem>>,
|
||||
pub cpu_used_pct: AtomicU32,
|
||||
pub cpu_count: AtomicU32,
|
||||
/// Whole-VM IO throughput, bytes/sec, sampled over the last 1s tick. Used
|
||||
/// by the host activity sampler to keep IO-bound-but-CPU-idle workloads
|
||||
/// (e.g. a long download) from being mistaken for inactive.
|
||||
pub net_bps: AtomicU64,
|
||||
pub disk_bps: AtomicU64,
|
||||
|
||||
/// Memory preload coordination. The host agent POSTs /memory/preload after
|
||||
/// a snapshot restore to materialise every physical page (so the next
|
||||
@ -56,6 +61,8 @@ impl AppState {
|
||||
port_subsystem,
|
||||
cpu_used_pct: AtomicU32::new(0),
|
||||
cpu_count: AtomicU32::new(0),
|
||||
net_bps: AtomicU64::new(0),
|
||||
disk_bps: AtomicU64::new(0),
|
||||
mem_preload_started: AtomicBool::new(false),
|
||||
mem_preload_done: AtomicBool::new(false),
|
||||
mem_preload_cancel: AtomicBool::new(false),
|
||||
@ -70,7 +77,7 @@ impl AppState {
|
||||
|
||||
let state_clone = Arc::clone(&state);
|
||||
std::thread::spawn(move || {
|
||||
cpu_sampler(state_clone);
|
||||
activity_sampler(state_clone);
|
||||
});
|
||||
|
||||
state
|
||||
@ -84,6 +91,14 @@ impl AppState {
|
||||
self.cpu_count.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn net_bps(&self) -> u64 {
|
||||
self.net_bps.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn disk_bps(&self) -> u64 {
|
||||
self.disk_bps.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Records a new lifecycle ID, returning true if it changed (i.e. this
|
||||
/// is the first /init since a resume). First-ever call returns false:
|
||||
/// boot-time /init doesn't need port-subsystem restart since the
|
||||
@ -99,12 +114,16 @@ impl AppState {
|
||||
}
|
||||
}
|
||||
|
||||
fn cpu_sampler(state: Arc<AppState>) {
|
||||
fn activity_sampler(state: Arc<AppState>) {
|
||||
use sysinfo::System;
|
||||
|
||||
let mut sys = System::new();
|
||||
sys.refresh_cpu_all();
|
||||
|
||||
// Cumulative IO counters from the previous tick. None until the first read.
|
||||
let mut prev_net: Option<u64> = read_net_bytes();
|
||||
let mut prev_disk: Option<u64> = read_disk_bytes();
|
||||
|
||||
loop {
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
|
||||
@ -123,5 +142,73 @@ fn cpu_sampler(state: Arc<AppState>) {
|
||||
state
|
||||
.cpu_count
|
||||
.store(sys.cpus().len() as u32, Ordering::Relaxed);
|
||||
|
||||
// Throughput = cumulative-counter delta over the ~1s tick. Counters can
|
||||
// reset across a snapshot restore; a wrapped/negative delta reads as 0.
|
||||
let cur_net = read_net_bytes();
|
||||
let net_bps = match (prev_net, cur_net) {
|
||||
(Some(p), Some(c)) => c.saturating_sub(p),
|
||||
_ => 0,
|
||||
};
|
||||
prev_net = cur_net;
|
||||
|
||||
let cur_disk = read_disk_bytes();
|
||||
let disk_bps = match (prev_disk, cur_disk) {
|
||||
(Some(p), Some(c)) => c.saturating_sub(p),
|
||||
_ => 0,
|
||||
};
|
||||
prev_disk = cur_disk;
|
||||
|
||||
state.net_bps.store(net_bps, Ordering::Relaxed);
|
||||
state.disk_bps.store(disk_bps, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sum of rx+tx bytes across all non-loopback interfaces, from /proc/net/dev.
|
||||
/// Returns None if the file can't be read/parsed.
|
||||
fn read_net_bytes() -> Option<u64> {
|
||||
let content = std::fs::read_to_string("/proc/net/dev").ok()?;
|
||||
let mut total: u64 = 0;
|
||||
// First two lines are headers.
|
||||
for line in content.lines().skip(2) {
|
||||
let Some((iface, rest)) = line.split_once(':') else {
|
||||
continue;
|
||||
};
|
||||
if iface.trim() == "lo" {
|
||||
continue;
|
||||
}
|
||||
let fields: Vec<&str> = rest.split_whitespace().collect();
|
||||
// Column 0 = rx bytes, column 8 = tx bytes.
|
||||
if let Some(rx) = fields.first().and_then(|v| v.parse::<u64>().ok()) {
|
||||
total = total.saturating_add(rx);
|
||||
}
|
||||
if let Some(tx) = fields.get(8).and_then(|v| v.parse::<u64>().ok()) {
|
||||
total = total.saturating_add(tx);
|
||||
}
|
||||
}
|
||||
Some(total)
|
||||
}
|
||||
|
||||
/// Sum of sectors read+written across all block devices, ×512, from
|
||||
/// /proc/diskstats. Skips partitions and loop/ram devices to avoid double
|
||||
/// counting. Returns None if the file can't be read/parsed.
|
||||
fn read_disk_bytes() -> Option<u64> {
|
||||
let content = std::fs::read_to_string("/proc/diskstats").ok()?;
|
||||
let mut sectors: u64 = 0;
|
||||
for line in content.lines() {
|
||||
let fields: Vec<&str> = line.split_whitespace().collect();
|
||||
// 0=major 1=minor 2=name ... 5=sectors read ... 9=sectors written.
|
||||
if fields.len() < 10 {
|
||||
continue;
|
||||
}
|
||||
let name = fields[2];
|
||||
if name.starts_with("loop") || name.starts_with("ram") {
|
||||
continue;
|
||||
}
|
||||
let read = fields[5].parse::<u64>().unwrap_or(0);
|
||||
let written = fields[9].parse::<u64>().unwrap_or(0);
|
||||
sectors = sectors.saturating_add(read).saturating_add(written);
|
||||
}
|
||||
// Linux reports diskstats sectors in fixed 512-byte units.
|
||||
Some(sectors.saturating_mul(512))
|
||||
}
|
||||
|
||||
@ -23,12 +23,10 @@ impl AtomicMax {
|
||||
if new <= current {
|
||||
return false;
|
||||
}
|
||||
match self.val.compare_exchange_weak(
|
||||
current,
|
||||
new,
|
||||
Ordering::Release,
|
||||
Ordering::Relaxed,
|
||||
) {
|
||||
match self
|
||||
.val
|
||||
.compare_exchange_weak(current, new, Ordering::Release, Ordering::Relaxed)
|
||||
{
|
||||
Ok(_) => return true,
|
||||
Err(_) => continue,
|
||||
}
|
||||
|
||||
@ -53,14 +53,15 @@
|
||||
let byocPageCount = $derived(Math.max(1, Math.ceil(flatByocHosts.length / PAGE_SIZE)));
|
||||
let byocPageHosts = $derived(flatByocHosts.slice(byocPage * PAGE_SIZE, (byocPage + 1) * PAGE_SIZE));
|
||||
|
||||
// Stats across all hosts
|
||||
let onlineCount = $derived(allHosts.filter((h) => h.status === 'online').length);
|
||||
let pendingCount = $derived(allHosts.filter((h) => h.status === 'pending').length);
|
||||
let totalCount = $derived(allHosts.length);
|
||||
let totalCpuCores = $derived(allHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0));
|
||||
let totalMemoryMb = $derived(allHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0));
|
||||
let totalRunningVcpus = $derived(allHosts.reduce((sum, h) => sum + h.running_vcpus, 0));
|
||||
let totalRunningMemoryMb = $derived(allHosts.reduce((sum, h) => sum + h.running_memory_mb, 0));
|
||||
// Aggregated stats — platform hosts only (admin needs a heads-up on
|
||||
// platform capacity; BYOC capacity belongs to individual teams).
|
||||
let onlineCount = $derived(platformHosts.filter((h) => h.status === 'online').length);
|
||||
let pendingCount = $derived(platformHosts.filter((h) => h.status === 'pending').length);
|
||||
let totalCount = $derived(platformHosts.length);
|
||||
let totalCpuCores = $derived(platformHosts.reduce((sum, h) => sum + (h.cpu_cores ?? 0), 0));
|
||||
let totalMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + (h.memory_mb ?? 0), 0));
|
||||
let totalRunningVcpus = $derived(platformHosts.reduce((sum, h) => sum + h.running_vcpus, 0));
|
||||
let totalRunningMemoryMb = $derived(platformHosts.reduce((sum, h) => sum + h.running_memory_mb, 0));
|
||||
|
||||
function formatMem(mb: number): string {
|
||||
return mb >= 1024 ? `${(mb / 1024).toFixed(0)} GB` : `${mb} MB`;
|
||||
|
||||
@ -6,10 +6,6 @@
|
||||
let { children } = $props();
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Wrenn — Capsules</title>
|
||||
</svelte:head>
|
||||
|
||||
<main class="flex flex-1 flex-col overflow-y-auto bg-[var(--color-bg-0)]">
|
||||
<!-- Header area -->
|
||||
{#if $page.params.id}
|
||||
|
||||
@ -256,6 +256,10 @@
|
||||
});
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Wrenn — Capsules</title>
|
||||
</svelte:head>
|
||||
|
||||
<style>
|
||||
@keyframes capsule-born {
|
||||
0%, 25% { background-color: rgba(94, 140, 88, 0.1); }
|
||||
|
||||
@ -130,22 +130,8 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C
|
||||
|
||||
// Forward stream events to WebSocket.
|
||||
for stream.Receive() {
|
||||
resp := stream.Msg()
|
||||
switch ev := resp.Event.(type) {
|
||||
case *pb.ExecStreamResponse_Start:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
|
||||
|
||||
case *pb.ExecStreamResponse_Data:
|
||||
switch o := ev.Data.Output.(type) {
|
||||
case *pb.ExecStreamData_Stdout:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
|
||||
case *pb.ExecStreamData_Stderr:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
|
||||
}
|
||||
|
||||
case *pb.ExecStreamResponse_End:
|
||||
exitCode := ev.End.ExitCode
|
||||
writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
|
||||
if m, ok := procRespToWSMsg(stream.Msg()); ok {
|
||||
writeWSJSON(conn, m)
|
||||
}
|
||||
}
|
||||
|
||||
@ -159,6 +145,38 @@ func (h *execStreamHandler) runExecStream(ctx context.Context, conn *websocket.C
|
||||
updateLastActive(h.db, sandboxID, sandboxIDStr)
|
||||
}
|
||||
|
||||
// procStreamResp is satisfied by both *pb.ExecStreamResponse and
|
||||
// *pb.ConnectProcessResponse: their oneof events carry the same inner messages,
|
||||
// so the wire-to-WS mapping below is shared between the exec-stream and
|
||||
// connect-process handlers.
|
||||
type procStreamResp interface {
|
||||
GetStart() *pb.ExecStreamStart
|
||||
GetData() *pb.ExecStreamData
|
||||
GetEnd() *pb.ExecStreamEnd
|
||||
}
|
||||
|
||||
// procRespToWSMsg maps one process stream response to the WS message to send.
|
||||
// The bool is false when the response carries nothing to forward.
|
||||
func procRespToWSMsg(resp procStreamResp) (wsOutMsg, bool) {
|
||||
if s := resp.GetStart(); s != nil {
|
||||
return wsOutMsg{Type: "start", PID: s.Pid}, true
|
||||
}
|
||||
if d := resp.GetData(); d != nil {
|
||||
switch o := d.Output.(type) {
|
||||
case *pb.ExecStreamData_Stdout:
|
||||
return wsOutMsg{Type: "stdout", Data: string(o.Stdout)}, true
|
||||
case *pb.ExecStreamData_Stderr:
|
||||
return wsOutMsg{Type: "stderr", Data: string(o.Stderr)}, true
|
||||
}
|
||||
return wsOutMsg{}, false
|
||||
}
|
||||
if e := resp.GetEnd(); e != nil {
|
||||
exitCode := e.ExitCode
|
||||
return wsOutMsg{Type: "exit", ExitCode: &exitCode}, true
|
||||
}
|
||||
return wsOutMsg{}, false
|
||||
}
|
||||
|
||||
func sendWSError(conn *websocket.Conn, msg string) {
|
||||
writeWSJSON(conn, wsOutMsg{Type: "error", Data: msg})
|
||||
}
|
||||
|
||||
@ -192,22 +192,8 @@ func (h *processHandler) runConnectProcess(ctx context.Context, conn *websocket.
|
||||
|
||||
// Forward stream events to WebSocket.
|
||||
for stream.Receive() {
|
||||
resp := stream.Msg()
|
||||
switch ev := resp.Event.(type) {
|
||||
case *pb.ConnectProcessResponse_Start:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "start", PID: ev.Start.Pid})
|
||||
|
||||
case *pb.ConnectProcessResponse_Data:
|
||||
switch o := ev.Data.Output.(type) {
|
||||
case *pb.ExecStreamData_Stdout:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "stdout", Data: string(o.Stdout)})
|
||||
case *pb.ExecStreamData_Stderr:
|
||||
writeWSJSON(conn, wsOutMsg{Type: "stderr", Data: string(o.Stderr)})
|
||||
}
|
||||
|
||||
case *pb.ConnectProcessResponse_End:
|
||||
exitCode := ev.End.ExitCode
|
||||
writeWSJSON(conn, wsOutMsg{Type: "exit", ExitCode: &exitCode})
|
||||
if m, ok := procRespToWSMsg(stream.Msg()); ok {
|
||||
writeWSJSON(conn, m)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -60,6 +60,10 @@ func agentErrToHTTP(err error) (int, string, string) {
|
||||
return http.StatusServiceUnavailable, "no_hosts_available", "no servers available — try again later"
|
||||
case connect.CodeUnimplemented:
|
||||
return http.StatusNotImplemented, "agent_error", err.Error()
|
||||
case connect.CodeDeadlineExceeded:
|
||||
return http.StatusGatewayTimeout, "timeout", "command timed out"
|
||||
case connect.CodeInternal:
|
||||
return http.StatusInternalServerError, "agent_error", err.Error()
|
||||
default:
|
||||
return http.StatusBadGateway, "agent_error", err.Error()
|
||||
}
|
||||
|
||||
@ -144,7 +144,7 @@ func (c *SandboxEventConsumer) handleMessage(ctx context.Context, msg redis.XMes
|
||||
}
|
||||
case events.CapsulePause:
|
||||
if event.Outcome == events.OutcomeSuccess {
|
||||
c.handleAutoPaused(ctx, sandboxID)
|
||||
c.handleAutoPaused(ctx, sandboxID, event)
|
||||
}
|
||||
case events.CapsuleDestroy:
|
||||
if event.Outcome == events.OutcomeSuccess {
|
||||
@ -226,12 +226,35 @@ func (c *SandboxEventConsumer) handleStarted(ctx context.Context, sandboxID pgty
|
||||
}
|
||||
}
|
||||
|
||||
func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID) {
|
||||
// handleAutoPaused reflects an autonomous (TTL reaper / shutdown) pause in the
|
||||
// DB and writes the audit row for it. The audit write happens only when the
|
||||
// status flip actually applied, so a stream redelivery does not double-count,
|
||||
// and so the HostMonitor host_state_sync fallback (which audits the
|
||||
// callback-lost case) stays mutually exclusive with this path.
|
||||
//
|
||||
// Uses audit.Log (row only) — NOT LogSandboxAutoPause, which republishes a
|
||||
// CapsulePause/system event that would loop straight back into this consumer.
|
||||
func (c *SandboxEventConsumer) handleAutoPaused(ctx context.Context, sandboxID pgtype.UUID, event events.Event) {
|
||||
for _, fromStatus := range []string{"running", "pausing"} {
|
||||
if _, err := c.db.UpdateSandboxStatusIf(ctx, db.UpdateSandboxStatusIfParams{
|
||||
ID: sandboxID, Status: fromStatus, Status_2: "paused",
|
||||
}); err == nil {
|
||||
slog.Debug("sandbox event consumer: auto-paused fallback applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus)
|
||||
slog.Debug("sandbox event consumer: auto-paused applied", "sandbox_id", id.FormatSandboxID(sandboxID), "from", fromStatus)
|
||||
reason := event.Metadata["reason"]
|
||||
if reason == "" {
|
||||
reason = "ttl_expired"
|
||||
}
|
||||
teamID, _ := id.ParseTeamID(event.TeamID)
|
||||
c.audit.Log(ctx, audit.Entry{
|
||||
TeamID: teamID,
|
||||
ActorType: "system",
|
||||
ResourceType: "sandbox",
|
||||
ResourceID: id.FormatSandboxID(sandboxID),
|
||||
Action: "pause",
|
||||
Scope: "team",
|
||||
Status: "info",
|
||||
Metadata: map[string]any{"reason": reason},
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@ -104,6 +104,14 @@ func (r *SSERelay) handleMessage(ctx context.Context, msg *redis.Message) {
|
||||
if err != nil {
|
||||
slog.Debug("sse relay: sandbox hydration failed (may be deleted)", "sandbox_id", event.Resource.ID, "error", err)
|
||||
} else {
|
||||
// Override the hydrated status with the status implied by the event
|
||||
// verb. Autonomous transitions (e.g. TTL auto-pause) flip the DB row
|
||||
// in a separate stream consumer that races this Pub/Sub read, so the
|
||||
// hydrated row may still carry the pre-transition status. The event
|
||||
// itself is authoritative for the resulting state.
|
||||
if status, ok := impliedSandboxStatus(event); ok {
|
||||
sb.Status = status
|
||||
}
|
||||
payload.Sandbox = sb
|
||||
}
|
||||
}
|
||||
@ -138,6 +146,25 @@ func (r *SSERelay) hydrateSandbox(ctx context.Context, sandboxIDStr string) (*sa
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
// impliedSandboxStatus maps a successful capsule lifecycle event to the
|
||||
// sandbox status it results in. Used to override a hydrated DB row that may
|
||||
// still carry the pre-transition status because the reconciliation consumer
|
||||
// that flips it races this Pub/Sub read. Returns false for events with no
|
||||
// single deterministic resulting status (failures, destroy, state_changed).
|
||||
func impliedSandboxStatus(event events.Event) (string, bool) {
|
||||
if event.Outcome != events.OutcomeSuccess {
|
||||
return "", false
|
||||
}
|
||||
switch event.Event {
|
||||
case events.CapsulePause:
|
||||
return "paused", true
|
||||
case events.CapsuleResume, events.CapsuleCreate:
|
||||
return "running", true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
func isCapsuleEvent(eventType string) bool {
|
||||
switch eventType {
|
||||
case events.CapsuleCreate, events.CapsulePause, events.CapsuleResume, events.CapsuleDestroy, events.CapsuleStateChanged:
|
||||
|
||||
@ -25,6 +25,7 @@ type Client struct {
|
||||
hostIP string
|
||||
base string
|
||||
healthURL string
|
||||
activityURL string
|
||||
httpClient *http.Client
|
||||
streamingClient *http.Client
|
||||
|
||||
@ -42,6 +43,7 @@ func New(hostIP string) *Client {
|
||||
hostIP: hostIP,
|
||||
base: base,
|
||||
healthURL: base + "/health",
|
||||
activityURL: base + "/activity",
|
||||
httpClient: httpClient,
|
||||
streamingClient: streamingClient,
|
||||
process: genconnect.NewProcessClient(streamingClient, base),
|
||||
@ -117,36 +119,17 @@ func (c *Client) Exec(ctx context.Context, cmd string, args []string, opts *Exec
|
||||
result := &ExecResult{}
|
||||
|
||||
for stream.Receive() {
|
||||
msg := stream.Msg()
|
||||
if msg.Event == nil {
|
||||
ev, ok := procEventToStreamEvent(stream.Msg().GetEvent())
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
event := msg.Event.GetEvent()
|
||||
switch e := event.(type) {
|
||||
case *envdpb.ProcessEvent_Start:
|
||||
slog.Debug("process started", "pid", e.Start.GetPid())
|
||||
|
||||
case *envdpb.ProcessEvent_Data:
|
||||
output := e.Data.GetOutput()
|
||||
switch o := output.(type) {
|
||||
case *envdpb.ProcessEvent_DataEvent_Stdout:
|
||||
result.Stdout = append(result.Stdout, o.Stdout...)
|
||||
case *envdpb.ProcessEvent_DataEvent_Stderr:
|
||||
result.Stderr = append(result.Stderr, o.Stderr...)
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_End:
|
||||
result.ExitCode = e.End.GetExitCode()
|
||||
if e.End.Error != nil {
|
||||
slog.Debug("process ended with error",
|
||||
"exit_code", e.End.GetExitCode(),
|
||||
"error", e.End.GetError(),
|
||||
)
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_Keepalive:
|
||||
// Ignore keepalives.
|
||||
switch ev.Type {
|
||||
case "stdout":
|
||||
result.Stdout = append(result.Stdout, ev.Data...)
|
||||
case "stderr":
|
||||
result.Stderr = append(result.Stderr, ev.Data...)
|
||||
case "end":
|
||||
result.ExitCode = ev.ExitCode
|
||||
}
|
||||
}
|
||||
|
||||
@ -166,6 +149,76 @@ type ExecStreamEvent struct {
|
||||
Error string
|
||||
}
|
||||
|
||||
// procEventToStreamEvent converts a raw envd ProcessEvent into an
|
||||
// ExecStreamEvent. The second return is false for events with no payload to
|
||||
// forward (nil event, keepalive, unknown data variant) so callers can skip
|
||||
// them. This is the single decoder shared by Exec, ExecStream and
|
||||
// ConnectProcess.
|
||||
func procEventToStreamEvent(pe *envdpb.ProcessEvent) (ExecStreamEvent, bool) {
|
||||
if pe == nil {
|
||||
return ExecStreamEvent{}, false
|
||||
}
|
||||
switch e := pe.GetEvent().(type) {
|
||||
case *envdpb.ProcessEvent_Start:
|
||||
return ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}, true
|
||||
case *envdpb.ProcessEvent_Data:
|
||||
switch o := e.Data.GetOutput().(type) {
|
||||
case *envdpb.ProcessEvent_DataEvent_Stdout:
|
||||
return ExecStreamEvent{Type: "stdout", Data: o.Stdout}, true
|
||||
case *envdpb.ProcessEvent_DataEvent_Stderr:
|
||||
return ExecStreamEvent{Type: "stderr", Data: o.Stderr}, true
|
||||
}
|
||||
return ExecStreamEvent{}, false
|
||||
case *envdpb.ProcessEvent_End:
|
||||
ev := ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
|
||||
if e.End.Error != nil {
|
||||
ev.Error = e.End.GetError()
|
||||
}
|
||||
return ev, true
|
||||
}
|
||||
return ExecStreamEvent{}, false
|
||||
}
|
||||
|
||||
// procEventStream is the subset of a Connect server-stream that pumpProcessEvents
|
||||
// needs. Both *connect.ServerStreamForClient[StartResponse] and
|
||||
// [ConnectResponse] satisfy it.
|
||||
type procEventStream[T any] interface {
|
||||
Receive() bool
|
||||
Msg() *T
|
||||
Err() error
|
||||
Close() error
|
||||
}
|
||||
|
||||
// pumpProcessEvents drains a process server-stream into ch until the stream ends
|
||||
// or ctx is cancelled, closing ch on exit. getEvent extracts the ProcessEvent
|
||||
// from each message so the same loop works for both the Start and Connect RPCs.
|
||||
func pumpProcessEvents[T any](
|
||||
ctx context.Context,
|
||||
stream procEventStream[T],
|
||||
getEvent func(*T) *envdpb.ProcessEvent,
|
||||
ch chan<- ExecStreamEvent,
|
||||
logLabel string,
|
||||
) {
|
||||
defer close(ch)
|
||||
defer stream.Close()
|
||||
|
||||
for stream.Receive() {
|
||||
ev, ok := procEventToStreamEvent(getEvent(stream.Msg()))
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case ch <- ev:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := stream.Err(); err != nil && err != io.EOF {
|
||||
slog.Debug(logLabel, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ExecStream runs a command inside the sandbox and returns a channel of output events.
|
||||
// The channel is closed when the process ends or the context is cancelled.
|
||||
func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-chan ExecStreamEvent, error) {
|
||||
@ -184,52 +237,7 @@ func (c *Client) ExecStream(ctx context.Context, cmd string, args ...string) (<-
|
||||
}
|
||||
|
||||
ch := make(chan ExecStreamEvent, 256)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
defer stream.Close()
|
||||
|
||||
for stream.Receive() {
|
||||
msg := stream.Msg()
|
||||
if msg.Event == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var ev ExecStreamEvent
|
||||
event := msg.Event.GetEvent()
|
||||
switch e := event.(type) {
|
||||
case *envdpb.ProcessEvent_Start:
|
||||
ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
|
||||
|
||||
case *envdpb.ProcessEvent_Data:
|
||||
output := e.Data.GetOutput()
|
||||
switch o := output.(type) {
|
||||
case *envdpb.ProcessEvent_DataEvent_Stdout:
|
||||
ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
|
||||
case *envdpb.ProcessEvent_DataEvent_Stderr:
|
||||
ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_End:
|
||||
ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
|
||||
if e.End.Error != nil {
|
||||
ev.Error = e.End.GetError()
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_Keepalive:
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- ev:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := stream.Err(); err != nil && err != io.EOF {
|
||||
slog.Debug("exec stream error", "error", err)
|
||||
}
|
||||
}()
|
||||
go pumpProcessEvents(ctx, stream, (*envdpb.StartResponse).GetEvent, ch, "exec stream error")
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
@ -434,7 +442,7 @@ func (c *Client) CancelMemoryPreload(ctx context.Context) error {
|
||||
// post-restore initialization. sandbox_id and template_id are passed
|
||||
// so envd can set WRENN_SANDBOX_ID and WRENN_TEMPLATE_ID env vars.
|
||||
func (c *Client) PostInit(ctx context.Context) error {
|
||||
return c.PostInitWithDefaults(ctx, "", nil, "", "")
|
||||
return c.PostInitWithDefaults(ctx, "", nil, "", "", "")
|
||||
}
|
||||
|
||||
// PostInitWithDefaults calls envd's POST /init endpoint with optional default
|
||||
@ -444,7 +452,7 @@ func (c *Client) PostInit(ctx context.Context) error {
|
||||
// timestamp and lifecycle_id are always populated: envd uses them to snap
|
||||
// the guest clock to the host's wall time and to detect post-resume calls
|
||||
// (which trigger port-forwarder restart + NFS remount).
|
||||
func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID string) error {
|
||||
func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, envVars map[string]string, sandboxID, templateID, proxyDomain string) error {
|
||||
payload := map[string]any{
|
||||
"timestamp": time.Now().UTC().Format(time.RFC3339Nano),
|
||||
"lifecycle_id": uuid.NewString(),
|
||||
@ -461,6 +469,9 @@ func (c *Client) PostInitWithDefaults(ctx context.Context, defaultUser string, e
|
||||
if templateID != "" {
|
||||
payload["template_id"] = templateID
|
||||
}
|
||||
if proxyDomain != "" {
|
||||
payload["proxy_domain"] = proxyDomain
|
||||
}
|
||||
|
||||
var body io.Reader
|
||||
if len(payload) > 0 {
|
||||
|
||||
@ -81,6 +81,42 @@ func (c *Client) WaitUntilRPCReady(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Activity is envd's liveness snapshot: VM-wide CPU utilisation and IO
|
||||
// throughput sampled inside the guest. The host activity sampler uses it to
|
||||
// decide whether a sandbox is doing real work and should keep its TTL fresh.
|
||||
type Activity struct {
|
||||
CPUCount uint32 `json:"cpu_count"`
|
||||
CPUUsedPct float32 `json:"cpu_used_pct"`
|
||||
NetBps uint64 `json:"net_bps"`
|
||||
DiskBps uint64 `json:"disk_bps"`
|
||||
}
|
||||
|
||||
// FetchActivity polls envd's /activity endpoint. The endpoint serves straight
|
||||
// from in-guest atomics (no syscalls), so it is cheap to call frequently.
|
||||
func (c *Client) FetchActivity(ctx context.Context) (*Activity, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.activityURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build activity request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetch envd activity: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("activity check returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var data Activity
|
||||
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
||||
return nil, fmt.Errorf("decode activity response: %w", err)
|
||||
}
|
||||
|
||||
return &data, nil
|
||||
}
|
||||
|
||||
// healthCheck sends a single GET /health request to envd.
|
||||
func (c *Client) healthCheck(ctx context.Context) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.healthURL, nil)
|
||||
|
||||
@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
|
||||
"connectrpc.com/connect"
|
||||
|
||||
@ -87,52 +86,7 @@ func (c *Client) ConnectProcess(ctx context.Context, pid uint32, tag string) (<-
|
||||
}
|
||||
|
||||
ch := make(chan ExecStreamEvent, 16)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
defer stream.Close()
|
||||
|
||||
for stream.Receive() {
|
||||
msg := stream.Msg()
|
||||
if msg.Event == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var ev ExecStreamEvent
|
||||
switch e := msg.Event.GetEvent().(type) {
|
||||
case *envdpb.ProcessEvent_Start:
|
||||
ev = ExecStreamEvent{Type: "start", PID: e.Start.GetPid()}
|
||||
|
||||
case *envdpb.ProcessEvent_Data:
|
||||
switch o := e.Data.GetOutput().(type) {
|
||||
case *envdpb.ProcessEvent_DataEvent_Stdout:
|
||||
ev = ExecStreamEvent{Type: "stdout", Data: o.Stdout}
|
||||
case *envdpb.ProcessEvent_DataEvent_Stderr:
|
||||
ev = ExecStreamEvent{Type: "stderr", Data: o.Stderr}
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_End:
|
||||
ev = ExecStreamEvent{Type: "end", ExitCode: e.End.GetExitCode()}
|
||||
if e.End.Error != nil {
|
||||
ev.Error = e.End.GetError()
|
||||
}
|
||||
|
||||
case *envdpb.ProcessEvent_Keepalive:
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- ev:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := stream.Err(); err != nil && err != io.EOF {
|
||||
slog.Debug("connect process stream error", "error", err)
|
||||
}
|
||||
}()
|
||||
go pumpProcessEvents(ctx, stream, (*envdpb.ConnectResponse).GetEvent, ch, "connect process stream error")
|
||||
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
@ -253,7 +253,7 @@ func (s *Server) Exec(
|
||||
|
||||
result, err := s.mgr.Exec(execCtx, msg.SandboxId, msg.Cmd, msg.Args, opts)
|
||||
if err != nil {
|
||||
return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("exec: %w", err))
|
||||
return nil, envdErr("exec", err)
|
||||
}
|
||||
|
||||
return connect.NewResponse(&pb.ExecResponse{
|
||||
@ -395,31 +395,15 @@ func (s *Server) ExecStream(
|
||||
}
|
||||
|
||||
for ev := range events {
|
||||
start, data, end := execEventParts(ev)
|
||||
var resp pb.ExecStreamResponse
|
||||
switch ev.Type {
|
||||
case "start":
|
||||
resp.Event = &pb.ExecStreamResponse_Start{
|
||||
Start: &pb.ExecStreamStart{Pid: ev.PID},
|
||||
}
|
||||
case "stdout":
|
||||
resp.Event = &pb.ExecStreamResponse_Data{
|
||||
Data: &pb.ExecStreamData{
|
||||
Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
|
||||
},
|
||||
}
|
||||
case "stderr":
|
||||
resp.Event = &pb.ExecStreamResponse_Data{
|
||||
Data: &pb.ExecStreamData{
|
||||
Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
|
||||
},
|
||||
}
|
||||
case "end":
|
||||
resp.Event = &pb.ExecStreamResponse_End{
|
||||
End: &pb.ExecStreamEnd{
|
||||
ExitCode: ev.ExitCode,
|
||||
Error: ev.Error,
|
||||
},
|
||||
}
|
||||
switch {
|
||||
case start != nil:
|
||||
resp.Event = &pb.ExecStreamResponse_Start{Start: start}
|
||||
case data != nil:
|
||||
resp.Event = &pb.ExecStreamResponse_Data{Data: data}
|
||||
case end != nil:
|
||||
resp.Event = &pb.ExecStreamResponse_End{End: end}
|
||||
default:
|
||||
continue
|
||||
}
|
||||
@ -431,6 +415,24 @@ func (s *Server) ExecStream(
|
||||
return nil
|
||||
}
|
||||
|
||||
// execEventParts maps a streaming exec event to its proto inner message.
|
||||
// Exactly one return value is non-nil; all-nil means the event carries nothing
|
||||
// to forward. Shared by ExecStream and ConnectProcess, which differ only in the
|
||||
// response envelope wrapping these inner messages.
|
||||
func execEventParts(ev envdclient.ExecStreamEvent) (*pb.ExecStreamStart, *pb.ExecStreamData, *pb.ExecStreamEnd) {
|
||||
switch ev.Type {
|
||||
case "start":
|
||||
return &pb.ExecStreamStart{Pid: ev.PID}, nil, nil
|
||||
case "stdout":
|
||||
return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data}}, nil
|
||||
case "stderr":
|
||||
return nil, &pb.ExecStreamData{Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data}}, nil
|
||||
case "end":
|
||||
return nil, nil, &pb.ExecStreamEnd{ExitCode: ev.ExitCode, Error: ev.Error}
|
||||
}
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
func (s *Server) WriteFileStream(
|
||||
ctx context.Context,
|
||||
stream *connect.ClientStream[pb.WriteFileStreamRequest],
|
||||
@ -912,31 +914,15 @@ func (s *Server) ConnectProcess(
|
||||
}
|
||||
|
||||
for ev := range events {
|
||||
start, data, end := execEventParts(ev)
|
||||
var resp pb.ConnectProcessResponse
|
||||
switch ev.Type {
|
||||
case "start":
|
||||
resp.Event = &pb.ConnectProcessResponse_Start{
|
||||
Start: &pb.ExecStreamStart{Pid: ev.PID},
|
||||
}
|
||||
case "stdout":
|
||||
resp.Event = &pb.ConnectProcessResponse_Data{
|
||||
Data: &pb.ExecStreamData{
|
||||
Output: &pb.ExecStreamData_Stdout{Stdout: ev.Data},
|
||||
},
|
||||
}
|
||||
case "stderr":
|
||||
resp.Event = &pb.ConnectProcessResponse_Data{
|
||||
Data: &pb.ExecStreamData{
|
||||
Output: &pb.ExecStreamData_Stderr{Stderr: ev.Data},
|
||||
},
|
||||
}
|
||||
case "end":
|
||||
resp.Event = &pb.ConnectProcessResponse_End{
|
||||
End: &pb.ExecStreamEnd{
|
||||
ExitCode: ev.ExitCode,
|
||||
Error: ev.Error,
|
||||
},
|
||||
}
|
||||
switch {
|
||||
case start != nil:
|
||||
resp.Event = &pb.ConnectProcessResponse_Start{Start: start}
|
||||
case data != nil:
|
||||
resp.Event = &pb.ConnectProcessResponse_Data{Data: data}
|
||||
case end != nil:
|
||||
resp.Event = &pb.ConnectProcessResponse_End{End: end}
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
111
internal/sandbox/activity_test.go
Normal file
111
internal/sandbox/activity_test.go
Normal file
@ -0,0 +1,111 @@
|
||||
package sandbox
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.omukk.dev/wrenn/wrenn/internal/envdclient"
|
||||
)
|
||||
|
||||
func TestIsBusy(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg Config
|
||||
act envdclient.Activity
|
||||
want bool
|
||||
}{
|
||||
// Default thresholds (zero cfg → defaults: cpu 5%, net 16K, disk 32K).
|
||||
{"idle", Config{}, envdclient.Activity{CPUUsedPct: 0.5, NetBps: 100, DiskBps: 200}, false},
|
||||
{"cpu just below", Config{}, envdclient.Activity{CPUUsedPct: 4.99}, false},
|
||||
{"cpu at threshold", Config{}, envdclient.Activity{CPUUsedPct: 5.0}, true},
|
||||
{"cpu above", Config{}, envdclient.Activity{CPUUsedPct: 80.0}, true},
|
||||
{"net just below", Config{}, envdclient.Activity{NetBps: 16*1024 - 1}, false},
|
||||
{"net at floor", Config{}, envdclient.Activity{NetBps: 16 * 1024}, true},
|
||||
{"disk just below", Config{}, envdclient.Activity{DiskBps: 32*1024 - 1}, false},
|
||||
{"disk at floor", Config{}, envdclient.Activity{DiskBps: 32 * 1024}, true},
|
||||
{"download: low cpu, high net", Config{}, envdclient.Activity{CPUUsedPct: 1.0, NetBps: 5 * 1024 * 1024}, true},
|
||||
|
||||
// Explicit overrides take precedence over defaults.
|
||||
{
|
||||
"custom cpu threshold met",
|
||||
Config{CPUBusyPct: 20.0},
|
||||
envdclient.Activity{CPUUsedPct: 25.0},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"custom cpu threshold not met",
|
||||
Config{CPUBusyPct: 20.0},
|
||||
envdclient.Activity{CPUUsedPct: 10.0},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"custom net floor not met",
|
||||
Config{NetFloorBps: 1024 * 1024},
|
||||
envdclient.Activity{NetBps: 16 * 1024},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
m := &Manager{cfg: tt.cfg}
|
||||
if got := m.isBusy(&tt.act); got != tt.want {
|
||||
t.Errorf("isBusy(%+v) = %v, want %v", tt.act, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyBusySample(t *testing.T) {
|
||||
// Debounce requires busyDebounceSamples consecutive busy samples before the
|
||||
// first bump. Verify the streak math and bump timing.
|
||||
if busyDebounceSamples != 2 {
|
||||
t.Skip("test written for busyDebounceSamples=2")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
startStreak int
|
||||
busy bool
|
||||
wantStreak int
|
||||
wantBump bool
|
||||
}{
|
||||
{"first busy, no bump yet", 0, true, 1, false},
|
||||
{"second consecutive busy, bump", 1, true, 2, true},
|
||||
{"sustained busy keeps bumping, streak held", 2, true, 2, true},
|
||||
{"single noise spike from idle, no bump", 0, false, 0, false},
|
||||
{"idle resets a building streak", 1, false, 0, false},
|
||||
{"idle resets a saturated streak", 2, false, 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotStreak, gotBump := applyBusySample(tt.startStreak, tt.busy)
|
||||
if gotStreak != tt.wantStreak || gotBump != tt.wantBump {
|
||||
t.Errorf("applyBusySample(%d, %v) = (%d, %v), want (%d, %v)",
|
||||
tt.startStreak, tt.busy, gotStreak, gotBump, tt.wantStreak, tt.wantBump)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyBusySample_NoiseScenario walks a realistic sample sequence: brief
|
||||
// noise never crosses the debounce, but sustained work does and then a return
|
||||
// to idle resets — proving an isolated spike cannot keep a sandbox alive.
|
||||
func TestApplyBusySample_NoiseScenario(t *testing.T) {
|
||||
if busyDebounceSamples != 2 {
|
||||
t.Skip("test written for busyDebounceSamples=2")
|
||||
}
|
||||
|
||||
samples := []bool{true, false, false, true, true, true, false}
|
||||
wantBumps := []bool{false, false, false, false, true, true, false}
|
||||
|
||||
streak := 0
|
||||
for i, busy := range samples {
|
||||
var bump bool
|
||||
streak, bump = applyBusySample(streak, busy)
|
||||
if bump != wantBumps[i] {
|
||||
t.Errorf("sample %d (busy=%v): bump = %v, want %v (streak=%d)",
|
||||
i, busy, bump, wantBumps[i], streak)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -88,14 +88,47 @@ type Config struct {
|
||||
EnvdTimeout time.Duration
|
||||
DefaultRootfsSizeMB int // target size for template rootfs images; 0 → DefaultDiskSizeMB
|
||||
|
||||
// ProxyDomain is the public domain sandboxes are served under (e.g.
|
||||
// "wrenn.dev"). Injected into envd at /init so `envd ports` can build
|
||||
// {port}-{sandbox_id}.{domain} URLs.
|
||||
ProxyDomain string
|
||||
|
||||
// Resolved at startup by the host agent.
|
||||
KernelPath string // path to the latest vmlinux-x.y.z
|
||||
KernelVersion string // semver extracted from filename
|
||||
VMMBin string // path to the cloud-hypervisor binary
|
||||
VMMVersion string // semver from cloud-hypervisor --version
|
||||
AgentVersion string // host agent version (injected via ldflags)
|
||||
|
||||
// Activity sampler thresholds. The sampler polls each running sandbox's
|
||||
// guest liveness and refreshes its TTL when it is doing real work, so a
|
||||
// long-running but non-interactive job is not mistaken for inactive. A
|
||||
// sandbox counts as busy when guest CPU ≥ CPUBusyPct, or net/disk
|
||||
// throughput ≥ the respective floor (bytes/sec). Zero values fall back to
|
||||
// the package defaults at sampler start.
|
||||
ActivitySampleInterval time.Duration
|
||||
CPUBusyPct float32
|
||||
NetFloorBps uint64
|
||||
DiskFloorBps uint64
|
||||
}
|
||||
|
||||
// Activity sampler defaults. Thresholds sit clear of idle-VM background noise
|
||||
// (envd's own sampler thread, guest timers) so a parked sandbox still times
|
||||
// out; the debounce below guards against a lone noisy sample masquerading as
|
||||
// work. All are env-overridable on the host agent.
|
||||
const (
|
||||
defaultActivitySampleInterval = 5 * time.Second
|
||||
defaultCPUBusyPct = 5.0 // percent of total vCPU capacity
|
||||
defaultNetFloorBps = 16 * 1024 // 16 KB/s
|
||||
defaultDiskFloorBps = 32 * 1024 // 32 KB/s
|
||||
activityPollTimeout = 3 * time.Second
|
||||
activitySampleConcurrency = 16
|
||||
// busyDebounceSamples is how many consecutive busy samples are required
|
||||
// before the sandbox's TTL is refreshed. With a 5s interval, real work
|
||||
// registers within ~10s while isolated noise spikes are ignored.
|
||||
busyDebounceSamples = 2
|
||||
)
|
||||
|
||||
// LifecycleEvent describes an autonomous state change initiated by the agent.
|
||||
type LifecycleEvent struct {
|
||||
Event string
|
||||
@ -189,6 +222,12 @@ type sandboxState struct {
|
||||
ring *metricsRing // tiered ring buffers for CPU/mem/disk metrics
|
||||
samplerCancel context.CancelFunc // cancels the per-sandbox sampling goroutine
|
||||
samplerDone chan struct{} // closed when the sampling goroutine exits
|
||||
|
||||
// activityBusyStreak counts consecutive busy activity samples. A single
|
||||
// noisy sample (idle background CPU, a stray packet) must not refresh the
|
||||
// TTL, so LastActiveAt is only bumped once the streak reaches
|
||||
// busyDebounceSamples. Reset to 0 by any non-busy sample. Guarded by m.mu.
|
||||
activityBusyStreak int
|
||||
}
|
||||
|
||||
// buildMetadata constructs the metadata map with version information.
|
||||
@ -419,14 +458,14 @@ func (m *Manager) Create(
|
||||
// Fetch envd version (best-effort).
|
||||
envdVersion, _ := client.FetchVersion(ctx)
|
||||
|
||||
// Apply template defaults via envd /init (no-op when both empty).
|
||||
if defaultUser != "" || len(defaultEnv) > 0 {
|
||||
initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
|
||||
if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID)); err != nil {
|
||||
slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err)
|
||||
}
|
||||
initCancel()
|
||||
// Apply template defaults + sandbox identity via envd /init. Always called
|
||||
// on create so envd records its sandbox ID and proxy domain (used by
|
||||
// `envd ports`), even when the template specifies no user/env defaults.
|
||||
initCtx, initCancel := context.WithTimeout(ctx, m.cfg.EnvdTimeout)
|
||||
if err := client.PostInitWithDefaults(initCtx, defaultUser, defaultEnv, sandboxID, id.UUIDString(templateID), m.cfg.ProxyDomain); err != nil {
|
||||
slog.Warn("post-create PostInit failed", "id", sandboxID, "error", err)
|
||||
}
|
||||
initCancel()
|
||||
|
||||
now := time.Now()
|
||||
sb := &sandboxState{
|
||||
@ -667,7 +706,7 @@ func (m *Manager) SetDefaults(ctx context.Context, sandboxID, defaultUser string
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "")
|
||||
return c.PostInitWithDefaults(ctx, defaultUser, defaultEnv, "", "", "")
|
||||
}
|
||||
|
||||
// PtyAttach starts a new PTY process or reconnects to an existing one.
|
||||
@ -762,6 +801,11 @@ func (m *Manager) AcquireProxyConn(sandboxID string) (net.IP, *ConnTracker, bool
|
||||
if !sb.connTracker.Acquire() {
|
||||
return nil, nil, false
|
||||
}
|
||||
// Inbound proxy traffic counts as activity: an idle web server reachable
|
||||
// through the proxy should not be auto-paused while it is serving requests.
|
||||
m.mu.Lock()
|
||||
sb.LastActiveAt = time.Now()
|
||||
m.mu.Unlock()
|
||||
return sb.HostIP, sb.connTracker, true
|
||||
}
|
||||
|
||||
@ -872,6 +916,146 @@ func (m *Manager) reapExpired(_ context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// StartActivitySampler starts a background goroutine that polls each running
|
||||
// sandbox's guest liveness (CPU + net/disk IO) and refreshes LastActiveAt when
|
||||
// the sandbox is doing real work. This is what keeps a long-running but
|
||||
// non-interactive job (a build, a download) from being auto-paused by the TTL
|
||||
// reaper, while an idle workload (sleep, a parked shell) still times out.
|
||||
func (m *Manager) StartActivitySampler(ctx context.Context) {
|
||||
interval := m.cfg.ActivitySampleInterval
|
||||
if interval <= 0 {
|
||||
interval = defaultActivitySampleInterval
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-m.stopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
m.sampleActivity(ctx)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// activityTarget pairs a sandbox ID with the envd client to poll.
|
||||
type activityTarget struct {
|
||||
id string
|
||||
client *envdclient.Client
|
||||
}
|
||||
|
||||
func (m *Manager) sampleActivity(ctx context.Context) {
|
||||
// Snapshot the running sandboxes and their clients under the lock, then
|
||||
// poll over the network without holding it.
|
||||
m.mu.RLock()
|
||||
targets := make([]activityTarget, 0, len(m.boxes))
|
||||
for id, sb := range m.boxes {
|
||||
if sb.Status != models.StatusRunning {
|
||||
continue
|
||||
}
|
||||
// Skip sandboxes still loading memory after a resume — they are not
|
||||
// settled and their IO/CPU is preload noise, not user work.
|
||||
if sb.memLoadDone != nil {
|
||||
select {
|
||||
case <-sb.memLoadDone:
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
c := sb.client.Load()
|
||||
if c == nil {
|
||||
continue
|
||||
}
|
||||
targets = append(targets, activityTarget{id: id, client: c})
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
if len(targets) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
sem := make(chan struct{}, activitySampleConcurrency)
|
||||
var wg sync.WaitGroup
|
||||
for _, t := range targets {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(t activityTarget) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
m.pollAndBump(ctx, t)
|
||||
}(t)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// pollAndBump fetches one sandbox's activity and refreshes its TTL once it has
|
||||
// been busy for busyDebounceSamples consecutive samples. Poll failures are
|
||||
// treated as a non-busy sample: an unreachable envd is handled by the reaper /
|
||||
// heartbeat paths, and resetting the streak is the safe default.
|
||||
func (m *Manager) pollAndBump(ctx context.Context, t activityTarget) {
|
||||
pollCtx, cancel := context.WithTimeout(ctx, activityPollTimeout)
|
||||
defer cancel()
|
||||
|
||||
act, err := t.client.FetchActivity(pollCtx)
|
||||
busy := err == nil && m.isBusy(act)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
sb, ok := m.boxes[t.id]
|
||||
if !ok || sb.Status != models.StatusRunning {
|
||||
return
|
||||
}
|
||||
|
||||
streak, bump := applyBusySample(sb.activityBusyStreak, busy)
|
||||
sb.activityBusyStreak = streak
|
||||
if bump {
|
||||
sb.LastActiveAt = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
// applyBusySample advances a debounce streak with the latest sample and
|
||||
// reports whether the TTL should be refreshed this tick. A non-busy sample
|
||||
// resets the streak; the bump fires once the streak reaches the debounce
|
||||
// threshold and on every busy tick thereafter (the streak is held at the
|
||||
// threshold rather than growing unbounded).
|
||||
func applyBusySample(streak int, busy bool) (newStreak int, bump bool) {
|
||||
if !busy {
|
||||
return 0, false
|
||||
}
|
||||
streak++
|
||||
if streak >= busyDebounceSamples {
|
||||
return busyDebounceSamples, true
|
||||
}
|
||||
return streak, false
|
||||
}
|
||||
|
||||
// isBusy reports whether a guest liveness snapshot represents real work.
|
||||
func (m *Manager) isBusy(act *envdclient.Activity) bool {
|
||||
cpuThreshold := m.cfg.CPUBusyPct
|
||||
if cpuThreshold <= 0 {
|
||||
cpuThreshold = defaultCPUBusyPct
|
||||
}
|
||||
netFloor := m.cfg.NetFloorBps
|
||||
if netFloor == 0 {
|
||||
netFloor = defaultNetFloorBps
|
||||
}
|
||||
diskFloor := m.cfg.DiskFloorBps
|
||||
if diskFloor == 0 {
|
||||
diskFloor = defaultDiskFloorBps
|
||||
}
|
||||
|
||||
return act.CPUUsedPct >= cpuThreshold ||
|
||||
act.NetBps >= netFloor ||
|
||||
act.DiskBps >= diskFloor
|
||||
}
|
||||
|
||||
// Shutdown gracefully drains the manager. Running sandboxes are paused so
|
||||
// their state survives across agent restarts; any sandboxes still holding
|
||||
// runtime resources after PauseAll (e.g. paused failed, or status was
|
||||
|
||||
@ -110,7 +110,7 @@ func (m *Manager) initAndStartMemoryLoader(ctx context.Context, sb *sandboxState
|
||||
slog.Warn("post-restore PostInit skipped: envd client cleared", "id", sb.ID)
|
||||
return
|
||||
}
|
||||
if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr); err != nil {
|
||||
if err := c.PostInitWithDefaults(initCtx, defaultUser, envVars, sb.ID, templateIDStr, m.cfg.ProxyDomain); err != nil {
|
||||
slog.Warn("post-restore PostInit failed", "id", sb.ID, "error", err)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user