1
0
forked from wrenn/wrenn

feat(envd): update guest agent for Cloud Hypervisor

Remove Firecracker-specific MMDS metadata fetching and metrics host
module. CH communicates with the guest purely over TAP networking,
so MMDS (Firecracker's metadata service via MMDS address) is no longer
needed.

- Remove src/host/ module (mmds.rs, metrics.rs)
- Remove reqwest dependency (was only used for MMDS HTTP calls)
- Remove --isnotfc CLI flag (no longer dual-mode)
- Simplify health endpoint and init handler
- Update state management for CH snapshot lifecycle
- Bump version to 0.3.0
This commit is contained in:
2026-05-17 01:33:25 +06:00
parent eaa6b8576d
commit dd8a940431
13 changed files with 124 additions and 755 deletions

426
envd-rs/Cargo.lock generated
View File

@ -241,12 +241,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bumpalo"
version = "3.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
[[package]]
name = "bytes"
version = "1.11.1"
@ -486,17 +480,6 @@ dependencies = [
"subtle",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
@ -514,7 +497,7 @@ dependencies = [
[[package]]
name = "envd"
version = "0.2.1"
version = "0.3.0"
dependencies = [
"async-stream",
"axum",
@ -537,7 +520,6 @@ dependencies = [
"mime_guess",
"nix",
"notify",
"reqwest",
"serde",
"serde_json",
"sha2",
@ -889,7 +871,6 @@ dependencies = [
"pin-project-lite",
"smallvec",
"tokio",
"want",
]
[[package]]
@ -898,103 +879,13 @@ version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
dependencies = [
"base64",
"bytes",
"futures-channel",
"futures-util",
"http",
"http-body",
"hyper",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
"socket2",
"tokio",
"tower-service",
"tracing",
]
[[package]]
name = "icu_collections"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
dependencies = [
"displaydoc",
"potential_utf",
"utf8_iter",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locale_core"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_normalizer"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
dependencies = [
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
[[package]]
name = "icu_properties"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
dependencies = [
"icu_collections",
"icu_locale_core",
"icu_properties_data",
"icu_provider",
"zerotrie",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
[[package]]
name = "icu_provider"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
dependencies = [
"displaydoc",
"icu_locale_core",
"writeable",
"yoke",
"zerofrom",
"zerotrie",
"zerovec",
]
[[package]]
@ -1003,27 +894,6 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
[[package]]
name = "idna"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "indexmap"
version = "2.14.0"
@ -1065,22 +935,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "ipnet"
version = "2.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
[[package]]
name = "iri-string"
version = "0.7.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
@ -1103,18 +957,6 @@ dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf"
dependencies = [
"cfg-if",
"futures-util",
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "kqueue"
version = "1.1.1"
@ -1171,12 +1013,6 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]]
name = "litemap"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
[[package]]
name = "lock_api"
version = "0.4.14"
@ -1405,15 +1241,6 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "potential_utf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
dependencies = [
"zerovec",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
@ -1509,38 +1336,6 @@ version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
dependencies = [
"base64",
"bytes",
"futures-core",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"js-sys",
"log",
"percent-encoding",
"pin-project-lite",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower",
"tower-http",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "rustix"
version = "1.1.4"
@ -1554,12 +1349,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
version = "1.0.23"
@ -1723,12 +1512,6 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "strsim"
version = "0.11.1"
@ -1757,20 +1540,6 @@ name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]]
name = "synstructure"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sysinfo"
@ -1828,16 +1597,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "tinystr"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "tokio"
version = "1.52.1"
@ -1911,14 +1670,12 @@ dependencies = [
"http-body-util",
"http-range-header",
"httpdate",
"iri-string",
"mime",
"mime_guess",
"percent-encoding",
"pin-project-lite",
"tokio",
"tokio-util",
"tower",
"tower-layer",
"tower-service",
"tracing",
@ -2011,12 +1768,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "try-lock"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
version = "1.20.0"
@ -2041,24 +1792,6 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "url"
version = "2.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"serde",
]
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
@ -2087,15 +1820,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
dependencies = [
"try-lock",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
@ -2120,61 +1844,6 @@ dependencies = [
"wit-bindgen 0.51.0",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea"
dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-encoder"
version = "0.244.0"
@ -2209,16 +1878,6 @@ dependencies = [
"semver",
]
[[package]]
name = "web-sys"
version = "0.3.97"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -2485,56 +2144,6 @@ dependencies = [
"wasmparser",
]
[[package]]
name = "writeable"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
[[package]]
name = "yoke"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
dependencies = [
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zerofrom"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "zeroize"
version = "1.8.2"
@ -2555,39 +2164,6 @@ dependencies = [
"syn",
]
[[package]]
name = "zerotrie"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
]
[[package]]
name = "zerovec"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zmij"
version = "1.0.21"

View File

@ -1,6 +1,6 @@
[package]
name = "envd"
version = "0.2.1"
version = "0.3.0"
edition = "2024"
rust-version = "1.88"
@ -53,9 +53,6 @@ notify = "7"
# Compression
flate2 = "1"
# HTTP client (MMDS polling)
reqwest = { version = "0.12", default-features = false, features = ["json"] }
# Directory walking
walkdir = "2"

View File

@ -1,6 +1,6 @@
# envd (Rust)
Wrenn guest agent daemon — runs as PID 1 inside Firecracker microVMs. Provides process management, filesystem operations, file transfer, port forwarding, and VM lifecycle control over Connect RPC and HTTP.
Wrenn guest agent daemon — runs as PID 1 inside Cloud Hypervisor microVMs. Provides process management, filesystem operations, file transfer, port forwarding, and VM lifecycle control over Connect RPC and HTTP.
Rust rewrite of `envd/` (Go). Drop-in replacement — same wire protocol, same endpoints, same CLI flags.
@ -50,7 +50,7 @@ cargo build
Run locally (outside a VM):
```bash
./target/debug/envd --isnotfc --port 49983
./target/debug/envd --port 49983
```
### Via Makefile (from repo root)
@ -64,7 +64,6 @@ make build-envd-go # Go version (for comparison)
```
--port <PORT> Listen port [default: 49983]
--isnotfc Not running inside Firecracker (disables MMDS, cgroups)
--version Print version and exit
--commit Print git commit and exit
--cmd <CMD> Spawn a process at startup (e.g. --cmd "/bin/bash")
@ -81,7 +80,7 @@ make build-envd-go # Go version (for comparison)
| GET | `/metrics` | System metrics (CPU, memory, disk) |
| GET | `/envs` | Current environment variables |
| POST | `/init` | Host agent init (token, env, mounts) |
| POST | `/snapshot/prepare` | Quiesce before Firecracker snapshot |
| POST | `/snapshot/prepare` | Quiesce before Cloud Hypervisor snapshot |
| GET | `/files` | Download file (gzip, range support) |
| POST | `/files` | Upload file(s) via multipart |
@ -108,7 +107,7 @@ src/
├── util.rs # AtomicMax
├── auth/ # Token, signing, middleware
├── crypto/ # SHA-256, SHA-512, HMAC
├── host/ # MMDS polling, system metrics
├── host/ # System metrics
├── http/ # Axum handlers (health, init, snapshot, files, encoding)
├── permissions/ # Path resolution, user lookup, chown
├── rpc/ # Connect RPC services

View File

@ -9,8 +9,3 @@ pub const WRENN_RUN_DIR: &str = "/run/wrenn";
pub const KILOBYTE: u64 = 1024;
pub const MEGABYTE: u64 = 1024 * KILOBYTE;
pub const MMDS_ADDRESS: &str = "169.254.169.254";
pub const MMDS_POLL_INTERVAL: Duration = Duration::from_millis(50);
pub const MMDS_TOKEN_EXPIRATION_SECS: u64 = 60;
pub const MMDS_ACCESS_TOKEN_CLIENT_TIMEOUT: Duration = Duration::from_secs(10);

View File

@ -1,73 +0,0 @@
use std::ffi::CString;
use std::time::{SystemTime, UNIX_EPOCH};
use serde::Serialize;
#[derive(Serialize)]
pub struct Metrics {
pub ts: i64,
pub cpu_count: u32,
pub cpu_used_pct: f32,
pub mem_total_mib: u64,
pub mem_used_mib: u64,
pub mem_total: u64,
pub mem_used: u64,
pub disk_used: u64,
pub disk_total: u64,
}
pub fn get_metrics() -> Result<Metrics, String> {
use sysinfo::System;
let mut sys = System::new();
sys.refresh_memory();
sys.refresh_cpu_all();
std::thread::sleep(std::time::Duration::from_millis(100));
sys.refresh_cpu_all();
let cpu_count = sys.cpus().len() as u32;
let cpu_used_pct = sys.global_cpu_usage();
let cpu_used_pct_rounded = if cpu_used_pct > 0.0 {
(cpu_used_pct * 100.0).round() / 100.0
} else {
0.0
};
let mem_total = sys.total_memory();
let mem_used = sys.used_memory();
let (disk_total, disk_used) = disk_stats("/")?;
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs() as i64;
Ok(Metrics {
ts,
cpu_count,
cpu_used_pct: cpu_used_pct_rounded,
mem_total_mib: mem_total / 1024 / 1024,
mem_used_mib: mem_used / 1024 / 1024,
mem_total,
mem_used,
disk_used,
disk_total,
})
}
fn disk_stats(path: &str) -> Result<(u64, u64), String> {
let c_path = CString::new(path).unwrap();
let mut stat: libc::statfs = unsafe { std::mem::zeroed() };
let ret = unsafe { libc::statfs(c_path.as_ptr(), &mut stat) };
if ret != 0 {
return Err(format!("statfs failed: {}", std::io::Error::last_os_error()));
}
let block = stat.f_bsize as u64;
let total = stat.f_blocks * block;
let available = stat.f_bavail * block;
Ok((total, total - available))
}

View File

@ -1,120 +0,0 @@
use std::sync::Arc;
use std::time::Duration;
use dashmap::DashMap;
use serde::Deserialize;
use tokio_util::sync::CancellationToken;
use crate::config::{MMDS_ADDRESS, MMDS_POLL_INTERVAL, MMDS_TOKEN_EXPIRATION_SECS, WRENN_RUN_DIR};
#[derive(Debug, Clone, Deserialize)]
pub struct MMDSOpts {
#[serde(rename = "instanceID")]
pub sandbox_id: String,
#[serde(rename = "envID")]
pub template_id: String,
#[serde(rename = "address", default)]
pub logs_collector_address: String,
#[serde(rename = "accessTokenHash", default)]
pub access_token_hash: String,
}
async fn get_mmds_token(client: &reqwest::Client) -> Result<String, String> {
let resp = client
.put(format!("http://{MMDS_ADDRESS}/latest/api/token"))
.header(
"X-metadata-token-ttl-seconds",
MMDS_TOKEN_EXPIRATION_SECS.to_string(),
)
.send()
.await
.map_err(|e| format!("mmds token request failed: {e}"))?;
let token = resp.text().await.map_err(|e| format!("mmds token read: {e}"))?;
if token.is_empty() {
return Err("mmds token is an empty string".into());
}
Ok(token)
}
async fn get_mmds_opts(client: &reqwest::Client, token: &str) -> Result<MMDSOpts, String> {
let resp = client
.get(format!("http://{MMDS_ADDRESS}"))
.header("X-metadata-token", token)
.header("Accept", "application/json")
.send()
.await
.map_err(|e| format!("mmds opts request failed: {e}"))?;
resp.json::<MMDSOpts>()
.await
.map_err(|e| format!("mmds opts parse: {e}"))
}
pub async fn get_access_token_hash() -> Result<String, String> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.no_proxy()
.build()
.map_err(|e| format!("http client: {e}"))?;
let token = get_mmds_token(&client).await?;
let opts = get_mmds_opts(&client, &token).await?;
Ok(opts.access_token_hash)
}
/// Polls MMDS every 50ms until metadata is available.
/// Stores sandbox_id and template_id in env_vars and writes to /run/wrenn/ files.
pub async fn poll_for_opts(
env_vars: Arc<DashMap<String, String>>,
cancel: CancellationToken,
) -> Option<MMDSOpts> {
let client = reqwest::Client::builder()
.no_proxy()
.build()
.ok()?;
let mut interval = tokio::time::interval(MMDS_POLL_INTERVAL);
loop {
tokio::select! {
_ = cancel.cancelled() => {
tracing::warn!("context cancelled while waiting for mmds opts");
return None;
}
_ = interval.tick() => {
let token = match get_mmds_token(&client).await {
Ok(t) => t,
Err(e) => {
tracing::debug!(error = %e, "mmds token poll");
continue;
}
};
let opts = match get_mmds_opts(&client, &token).await {
Ok(o) => o,
Err(e) => {
tracing::debug!(error = %e, "mmds opts poll");
continue;
}
};
env_vars.insert("WRENN_SANDBOX_ID".into(), opts.sandbox_id.clone());
env_vars.insert("WRENN_TEMPLATE_ID".into(), opts.template_id.clone());
let run_dir = std::path::Path::new(WRENN_RUN_DIR);
if let Err(e) = std::fs::create_dir_all(run_dir) {
tracing::error!(error = %e, "mmds: failed to create run dir");
}
if let Err(e) = std::fs::write(run_dir.join(".WRENN_SANDBOX_ID"), &opts.sandbox_id) {
tracing::error!(error = %e, "mmds: failed to write .WRENN_SANDBOX_ID");
}
if let Err(e) = std::fs::write(run_dir.join(".WRENN_TEMPLATE_ID"), &opts.template_id) {
tracing::error!(error = %e, "mmds: failed to write .WRENN_TEMPLATE_ID");
}
return Some(opts);
}
}
}
}

View File

@ -1,2 +0,0 @@
pub mod metrics;
pub mod mmds;

View File

@ -1,5 +1,4 @@
use std::sync::Arc;
use std::sync::atomic::Ordering;
use axum::Json;
use axum::extract::State;
@ -10,13 +9,7 @@ use serde_json::json;
use crate::state::AppState;
pub async fn get_health(State(state): State<Arc<AppState>>) -> impl IntoResponse {
if state
.needs_restore
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
post_restore_recovery(&state);
}
state.try_restore_recovery();
tracing::trace!("health check");
@ -25,17 +18,3 @@ pub async fn get_health(State(state): State<Arc<AppState>>) -> impl IntoResponse
Json(json!({ "version": state.version })),
)
}
fn post_restore_recovery(state: &AppState) {
tracing::info!("restore: post-restore recovery (no GC needed in Rust)");
state.snapshot_in_progress.store(false, std::sync::atomic::Ordering::Release);
state.conn_tracker.restore_after_snapshot();
tracing::info!("restore: zombie connections closed");
if let Some(ref ps) = state.port_subsystem {
ps.restart();
tracing::info!("restore: port subsystem restarted");
}
}

View File

@ -1,6 +1,5 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use axum::Json;
use axum::extract::State;
@ -8,20 +7,25 @@ use axum::http::{StatusCode, header};
use axum::response::IntoResponse;
use serde::Deserialize;
use crate::crypto;
use crate::host::mmds;
use crate::state::AppState;
#[derive(Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct InitRequest {
#[serde(rename = "access_token")]
pub access_token: Option<String>,
#[serde(rename = "defaultUser")]
pub default_user: Option<String>,
#[serde(rename = "defaultWorkdir")]
pub default_workdir: Option<String>,
#[serde(rename = "envVars")]
pub env_vars: Option<HashMap<String, String>>,
#[serde(rename = "hyperloop_ip")]
pub hyperloop_ip: Option<String>,
pub timestamp: Option<String>,
#[serde(rename = "volume_mounts")]
pub volume_mounts: Option<Vec<VolumeMount>>,
pub sandbox_id: Option<String>,
pub template_id: Option<String>,
}
#[derive(Deserialize)]
@ -110,37 +114,27 @@ pub async fn post_init(
}
}
// Re-poll MMDS in background
if state.is_fc {
let env_vars = Arc::clone(&state.defaults.env_vars);
let cancel = tokio_util::sync::CancellationToken::new();
let cancel_clone = cancel.clone();
tokio::spawn(async move {
tokio::time::timeout(std::time::Duration::from_secs(60), async {
mmds::poll_for_opts(env_vars, cancel_clone).await;
})
.await
.ok();
});
// Set sandbox/template metadata from request body.
if let Some(ref id) = init_req.sandbox_id {
tracing::debug!(sandbox_id = %id, "setting sandbox ID from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) };
write_run_file(".WRENN_SANDBOX_ID", id);
state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone());
}
if let Some(ref id) = init_req.template_id {
tracing::debug!(template_id = %id, "setting template ID from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) };
write_run_file(".WRENN_TEMPLATE_ID", id);
state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone());
}
trigger_restore_and_respond(&state).await
}
async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Response {
// Safety net: if health check's postRestoreRecovery hasn't run yet
if state
.needs_restore
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
post_restore_recovery(state);
}
state.conn_tracker.restore_after_snapshot();
if let Some(ref ps) = state.port_subsystem {
ps.restart();
}
state.try_restore_recovery();
(
StatusCode::NO_CONTENT,
@ -149,46 +143,13 @@ async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Respon
.into_response()
}
fn post_restore_recovery(state: &AppState) {
tracing::info!("restore: post-restore recovery (no GC needed in Rust)");
state.snapshot_in_progress.store(false, std::sync::atomic::Ordering::Release);
state.conn_tracker.restore_after_snapshot();
if let Some(ref ps) = state.port_subsystem {
ps.restart();
tracing::info!("restore: port subsystem restarted");
}
}
async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> {
// Fast path: matches existing token
if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) {
return Ok(());
}
// Check MMDS hash
if state.is_fc {
if let Ok(mmds_hash) = mmds::get_access_token_hash().await {
if !mmds_hash.is_empty() {
if request_token.is_empty() {
let empty_hash = crypto::sha512::hash_access_token("");
if mmds_hash == empty_hash {
return Ok(());
}
} else {
let token_hash = crypto::sha512::hash_access_token(request_token);
if mmds_hash == token_hash {
return Ok(());
}
}
return Err("access token validation failed".into());
}
}
}
// First-time setup: no existing token and no MMDS
// First-time setup: no existing token
if !state.access_token.is_set() {
return Ok(());
}
@ -268,14 +229,21 @@ async fn setup_nfs(nfs_target: &str, path: &str) {
}
}
fn write_run_file(name: &str, value: &str) {
let dir = std::path::Path::new("/run/wrenn");
if let Err(e) = std::fs::create_dir_all(dir) {
tracing::warn!(error = %e, "failed to create /run/wrenn");
return;
}
if let Err(e) = std::fs::write(dir.join(name), value) {
tracing::warn!(error = %e, name, "failed to write run file");
}
}
fn chrono_parse_to_nanos(ts: &str) -> Result<i64, ()> {
// Parse RFC3339 timestamp to nanoseconds since epoch
// Simple approach: parse as seconds + fractional
let secs = ts.parse::<f64>().ok();
if let Some(s) = secs {
return Ok((s * 1_000_000_000.0) as i64);
}
// Try RFC3339 format
// For now, fall back to allowing the update
Err(())
}

View File

@ -7,7 +7,7 @@ use axum::response::IntoResponse;
use crate::state::AppState;
/// POST /snapshot/prepare — quiesce subsystems before Firecracker snapshot.
/// POST /snapshot/prepare — quiesce subsystems before VM snapshot.
///
/// In Rust there is no GC dance. We just:
/// 1. Drop page cache to shrink snapshot size

View File

@ -6,7 +6,6 @@ mod config;
mod conntracker;
mod crypto;
mod execcontext;
mod host;
mod http;
mod logging;
mod permissions;
@ -22,7 +21,6 @@ use std::sync::Arc;
use clap::Parser;
use tokio::net::TcpListener;
use tokio_util::sync::CancellationToken;
use config::{DEFAULT_PORT, DEFAULT_USER, WRENN_RUN_DIR};
use execcontext::Defaults;
@ -44,9 +42,6 @@ struct Cli {
#[arg(long, default_value_t = DEFAULT_PORT)]
port: u16,
#[arg(long = "isnotfc", default_value_t = false)]
is_not_fc: bool,
#[arg(long)]
version: bool,
@ -73,35 +68,22 @@ async fn main() {
return;
}
let use_json = !cli.is_not_fc;
logging::init(use_json);
logging::init(true);
if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) {
tracing::error!(error = %e, "failed to create wrenn run directory");
}
let defaults = Defaults::new(DEFAULT_USER);
let is_fc_str = if cli.is_not_fc { "false" } else { "true" };
defaults
.env_vars
.insert("WRENN_SANDBOX".into(), is_fc_str.into());
.insert("WRENN_SANDBOX".into(), "true".into());
let wrenn_sandbox_path = Path::new(WRENN_RUN_DIR).join(".WRENN_SANDBOX");
if let Err(e) = fs::write(&wrenn_sandbox_path, is_fc_str.as_bytes()) {
if let Err(e) = fs::write(&wrenn_sandbox_path, b"true") {
tracing::error!(error = %e, "failed to write sandbox file");
}
let cancel = CancellationToken::new();
// MMDS polling (only in FC mode)
if !cli.is_not_fc {
let env_vars = Arc::clone(&defaults.env_vars);
let cancel_clone = cancel.clone();
tokio::spawn(async move {
host::mmds::poll_for_opts(env_vars, cancel_clone).await;
});
}
// Cgroup manager
let cgroup_manager: Arc<dyn cgroups::CgroupManager> =
match cgroups::Cgroup2Manager::new(
@ -143,14 +125,13 @@ async fn main() {
defaults,
VERSION.to_string(),
COMMIT.to_string(),
!cli.is_not_fc,
Some(Arc::clone(&port_subsystem)),
);
// Memory reclaimer — drop page cache when available memory is low.
// Firecracker balloon device can only reclaim pages the guest kernel freed.
// The balloon device can only reclaim pages the guest kernel freed.
// Pauses during snapshot/prepare to avoid corrupting kernel page table state.
if !cli.is_not_fc {
{
let state_for_reclaimer = Arc::clone(&state);
std::thread::spawn(move || memory_reclaimer(state_for_reclaimer));
}
@ -188,7 +169,6 @@ async fn main() {
}
port_subsystem.stop();
cancel.cancel();
}
fn spawn_initial_command(cmd: &str, state: &AppState) {
@ -233,9 +213,11 @@ fn spawn_initial_command(cmd: &str, state: &AppState) {
fn memory_reclaimer(state: Arc<AppState>) {
use std::sync::atomic::Ordering;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
const CHECK_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
const CHECK_INTERVAL: Duration = Duration::from_secs(10);
const DROP_THRESHOLD_PCT: u64 = 80;
const RESTORE_GRACE_SECS: u64 = 30;
loop {
std::thread::sleep(CHECK_INTERVAL);
@ -244,6 +226,20 @@ fn memory_reclaimer(state: Arc<AppState>) {
continue;
}
// Skip during post-restore grace period. Balloon deflation causes
// transient high memory that resolves on its own — triggering
// drop_caches during UFFD page fault storms makes the guest unresponsive.
let restore_epoch = state.restore_epoch.load(Ordering::Acquire);
if restore_epoch > 0 {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
if now.saturating_sub(restore_epoch) < RESTORE_GRACE_SECS {
continue;
}
}
let mut sys = sysinfo::System::new();
sys.refresh_memory();
let total = sys.total_memory();

View File

@ -57,7 +57,9 @@ impl Scanner {
pub async fn scan_and_broadcast(&self, cancel: CancellationToken) {
loop {
let conns = read_tcp_connections();
let conns = tokio::task::spawn_blocking(read_tcp_connections)
.await
.unwrap_or_default();
{
let subs = self.subs.read().unwrap();

View File

@ -1,5 +1,6 @@
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use crate::auth::token::SecureToken;
use crate::conntracker::ConnTracker;
@ -11,7 +12,6 @@ pub struct AppState {
pub defaults: Defaults,
pub version: String,
pub commit: String,
pub is_fc: bool,
pub needs_restore: AtomicBool,
pub last_set_time: AtomicMax,
pub access_token: SecureToken,
@ -20,6 +20,8 @@ pub struct AppState {
pub cpu_used_pct: AtomicU32,
pub cpu_count: AtomicU32,
pub snapshot_in_progress: AtomicBool,
pub last_health_epoch: AtomicU64,
pub restore_epoch: AtomicU64,
}
impl AppState {
@ -27,14 +29,12 @@ impl AppState {
defaults: Defaults,
version: String,
commit: String,
is_fc: bool,
port_subsystem: Option<Arc<PortSubsystem>>,
) -> Arc<Self> {
let state = Arc::new(Self {
defaults,
version,
commit,
is_fc,
needs_restore: AtomicBool::new(false),
last_set_time: AtomicMax::new(),
access_token: SecureToken::new(),
@ -43,6 +43,8 @@ impl AppState {
cpu_used_pct: AtomicU32::new(0),
cpu_count: AtomicU32::new(0),
snapshot_in_progress: AtomicBool::new(false),
last_health_epoch: AtomicU64::new(0),
restore_epoch: AtomicU64::new(0),
});
let state_clone = Arc::clone(&state);
@ -60,6 +62,47 @@ impl AppState {
pub fn cpu_count(&self) -> u32 {
self.cpu_count.load(Ordering::Relaxed)
}
/// Runs post-restore recovery if `needs_restore` is set OR a wall-clock
/// gap is detected (catches restores where snapshot/prepare never ran).
pub fn try_restore_recovery(&self) {
let now_epoch = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let prev_epoch = self.last_health_epoch.swap(now_epoch, Ordering::AcqRel);
// Detect restore via wall-clock gap: if >3s passed since last health
// check, the VM was frozen and restored. Catches the case where
// snapshot/prepare timed out and needs_restore was never set.
let gap_detected = prev_epoch > 0 && now_epoch.saturating_sub(prev_epoch) > 3;
let flag_set = self
.needs_restore
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok();
if !flag_set && !gap_detected {
return;
}
if gap_detected && !flag_set {
tracing::info!(
gap_secs = now_epoch.saturating_sub(prev_epoch),
"restore: detected via wall-clock gap (needs_restore was not set)"
);
}
tracing::info!("restore: post-restore recovery");
self.snapshot_in_progress.store(false, Ordering::Release);
self.restore_epoch.store(now_epoch, Ordering::Release);
self.conn_tracker.restore_after_snapshot();
if let Some(ref ps) = self.port_subsystem {
ps.restart();
tracing::info!("restore: port subsystem restarted");
}
}
}
fn cpu_sampler(state: Arc<AppState>) {
@ -70,6 +113,15 @@ fn cpu_sampler(state: Arc<AppState>) {
loop {
std::thread::sleep(std::time::Duration::from_secs(1));
if state.needs_restore.load(Ordering::Acquire) {
// After snapshot restore, sysinfo's internal CPU counters are stale.
// Reinitialize to get a fresh baseline.
sys = System::new();
sys.refresh_cpu_all();
continue;
}
sys.refresh_cpu_all();
let pct = sys.global_cpu_usage();