From dd8a94043190125aeba6a0a045fab4b4ee68a863 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Sun, 17 May 2026 01:33:25 +0600 Subject: [PATCH] feat(envd): update guest agent for Cloud Hypervisor Remove Firecracker-specific MMDS metadata fetching and metrics host module. CH communicates with the guest purely over TAP networking, so MMDS (Firecracker's metadata service via MMDS address) is no longer needed. - Remove src/host/ module (mmds.rs, metrics.rs) - Remove reqwest dependency (was only used for MMDS HTTP calls) - Remove --isnotfc CLI flag (no longer dual-mode) - Simplify health endpoint and init handler - Update state management for CH snapshot lifecycle - Bump version to 0.3.0 --- envd-rs/Cargo.lock | 426 +---------------------------------- envd-rs/Cargo.toml | 5 +- envd-rs/README.md | 9 +- envd-rs/src/config.rs | 5 - envd-rs/src/host/metrics.rs | 73 ------ envd-rs/src/host/mmds.rs | 120 ---------- envd-rs/src/host/mod.rs | 2 - envd-rs/src/http/health.rs | 23 +- envd-rs/src/http/init.rs | 102 +++------ envd-rs/src/http/snapshot.rs | 2 +- envd-rs/src/main.rs | 48 ++-- envd-rs/src/port/scanner.rs | 4 +- envd-rs/src/state.rs | 60 ++++- 13 files changed, 124 insertions(+), 755 deletions(-) delete mode 100644 envd-rs/src/host/metrics.rs delete mode 100644 envd-rs/src/host/mmds.rs delete mode 100644 envd-rs/src/host/mod.rs diff --git a/envd-rs/Cargo.lock b/envd-rs/Cargo.lock index 1120784..e40f5bd 100644 --- a/envd-rs/Cargo.lock +++ b/envd-rs/Cargo.lock @@ -241,12 +241,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bumpalo" -version = "3.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" - [[package]] name = "bytes" version = "1.11.1" @@ -486,17 +480,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "either" version = "1.15.0" @@ -514,7 +497,7 @@ dependencies = [ [[package]] name = "envd" -version = "0.2.1" +version = "0.3.0" dependencies = [ "async-stream", "axum", @@ -537,7 +520,6 @@ dependencies = [ "mime_guess", "nix", "notify", - "reqwest", "serde", "serde_json", "sha2", @@ -889,7 +871,6 @@ dependencies = [ "pin-project-lite", "smallvec", "tokio", - "want", ] [[package]] @@ -898,103 +879,13 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64", "bytes", - "futures-channel", - "futures-util", "http", "http-body", "hyper", - "ipnet", - "libc", - "percent-encoding", "pin-project-lite", - "socket2", "tokio", "tower-service", - "tracing", -] - -[[package]] -name = "icu_collections" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" -dependencies = [ - "displaydoc", - "potential_utf", - "utf8_iter", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" - -[[package]] -name = "icu_properties" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" - -[[package]] -name = "icu_provider" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", ] [[package]] @@ -1003,27 +894,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - [[package]] name = "indexmap" version = "2.14.0" @@ -1065,22 +935,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "ipnet" -version = "2.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" - -[[package]] -name = "iri-string" -version = "0.7.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1103,18 +957,6 @@ dependencies = [ "libc", ] -[[package]] -name = "js-sys" -version = "0.3.97" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" -dependencies = [ - "cfg-if", - "futures-util", - "once_cell", - "wasm-bindgen", -] - [[package]] name = "kqueue" version = "1.1.1" @@ -1171,12 +1013,6 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" -[[package]] -name = "litemap" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" - [[package]] name = "lock_api" version = "0.4.14" @@ -1405,15 +1241,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" -[[package]] -name = "potential_utf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" -dependencies = [ - "zerovec", -] - [[package]] name = "prettyplease" version = "0.2.37" @@ -1509,38 +1336,6 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" -[[package]] -name = "reqwest" -version = "0.12.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" -dependencies = [ - "base64", - "bytes", - "futures-core", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-util", - "js-sys", - "log", - "percent-encoding", - "pin-project-lite", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "tokio", - "tower", - "tower-http", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "rustix" version = "1.1.4" @@ -1554,12 +1349,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - [[package]] name = "ryu" version = "1.0.23" @@ -1723,12 +1512,6 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - [[package]] name = "strsim" version = "0.11.1" @@ -1757,20 +1540,6 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -dependencies = [ - "futures-core", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] [[package]] name = "sysinfo" @@ -1828,16 +1597,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "tinystr" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tokio" version = "1.52.1" @@ -1911,14 +1670,12 @@ dependencies = [ "http-body-util", "http-range-header", "httpdate", - "iri-string", "mime", "mime_guess", "percent-encoding", "pin-project-lite", "tokio", "tokio-util", - "tower", "tower-layer", "tower-service", "tracing", @@ -2011,12 +1768,6 @@ dependencies = [ "tracing-serde", ] -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - [[package]] name = "typenum" version = "1.20.0" @@ -2041,24 +1792,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -2087,15 +1820,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2120,61 +1844,6 @@ dependencies = [ "wit-bindgen 0.51.0", ] -[[package]] -name = "wasm-bindgen" -version = "0.2.120" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.120" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.120" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.120" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" -dependencies = [ - "unicode-ident", -] - [[package]] name = "wasm-encoder" version = "0.244.0" @@ -2209,16 +1878,6 @@ dependencies = [ "semver", ] -[[package]] -name = "web-sys" -version = "0.3.97" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" @@ -2485,56 +2144,6 @@ dependencies = [ "wasmparser", ] -[[package]] -name = "writeable" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" - -[[package]] -name = "yoke" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerofrom" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "zeroize" version = "1.8.2" @@ -2555,39 +2164,6 @@ dependencies = [ "syn", ] -[[package]] -name = "zerotrie" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "zmij" version = "1.0.21" diff --git a/envd-rs/Cargo.toml b/envd-rs/Cargo.toml index 35655f2..4f73c3a 100644 --- a/envd-rs/Cargo.toml +++ b/envd-rs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "envd" -version = "0.2.1" +version = "0.3.0" edition = "2024" rust-version = "1.88" @@ -53,9 +53,6 @@ notify = "7" # Compression flate2 = "1" -# HTTP client (MMDS polling) -reqwest = { version = "0.12", default-features = false, features = ["json"] } - # Directory walking walkdir = "2" diff --git a/envd-rs/README.md b/envd-rs/README.md index 3a82d2d..418d42b 100644 --- a/envd-rs/README.md +++ b/envd-rs/README.md @@ -1,6 +1,6 @@ # envd (Rust) -Wrenn guest agent daemon — runs as PID 1 inside Firecracker microVMs. Provides process management, filesystem operations, file transfer, port forwarding, and VM lifecycle control over Connect RPC and HTTP. +Wrenn guest agent daemon — runs as PID 1 inside Cloud Hypervisor microVMs. Provides process management, filesystem operations, file transfer, port forwarding, and VM lifecycle control over Connect RPC and HTTP. Rust rewrite of `envd/` (Go). Drop-in replacement — same wire protocol, same endpoints, same CLI flags. @@ -50,7 +50,7 @@ cargo build Run locally (outside a VM): ```bash -./target/debug/envd --isnotfc --port 49983 +./target/debug/envd --port 49983 ``` ### Via Makefile (from repo root) @@ -64,7 +64,6 @@ make build-envd-go # Go version (for comparison) ``` --port Listen port [default: 49983] ---isnotfc Not running inside Firecracker (disables MMDS, cgroups) --version Print version and exit --commit Print git commit and exit --cmd Spawn a process at startup (e.g. --cmd "/bin/bash") @@ -81,7 +80,7 @@ make build-envd-go # Go version (for comparison) | GET | `/metrics` | System metrics (CPU, memory, disk) | | GET | `/envs` | Current environment variables | | POST | `/init` | Host agent init (token, env, mounts) | -| POST | `/snapshot/prepare` | Quiesce before Firecracker snapshot | +| POST | `/snapshot/prepare` | Quiesce before Cloud Hypervisor snapshot | | GET | `/files` | Download file (gzip, range support) | | POST | `/files` | Upload file(s) via multipart | @@ -108,7 +107,7 @@ src/ ├── util.rs # AtomicMax ├── auth/ # Token, signing, middleware ├── crypto/ # SHA-256, SHA-512, HMAC -├── host/ # MMDS polling, system metrics +├── host/ # System metrics ├── http/ # Axum handlers (health, init, snapshot, files, encoding) ├── permissions/ # Path resolution, user lookup, chown ├── rpc/ # Connect RPC services diff --git a/envd-rs/src/config.rs b/envd-rs/src/config.rs index c2dac43..be89725 100644 --- a/envd-rs/src/config.rs +++ b/envd-rs/src/config.rs @@ -9,8 +9,3 @@ pub const WRENN_RUN_DIR: &str = "/run/wrenn"; pub const KILOBYTE: u64 = 1024; pub const MEGABYTE: u64 = 1024 * KILOBYTE; - -pub const MMDS_ADDRESS: &str = "169.254.169.254"; -pub const MMDS_POLL_INTERVAL: Duration = Duration::from_millis(50); -pub const MMDS_TOKEN_EXPIRATION_SECS: u64 = 60; -pub const MMDS_ACCESS_TOKEN_CLIENT_TIMEOUT: Duration = Duration::from_secs(10); diff --git a/envd-rs/src/host/metrics.rs b/envd-rs/src/host/metrics.rs deleted file mode 100644 index 671d1a6..0000000 --- a/envd-rs/src/host/metrics.rs +++ /dev/null @@ -1,73 +0,0 @@ -use std::ffi::CString; -use std::time::{SystemTime, UNIX_EPOCH}; - -use serde::Serialize; - -#[derive(Serialize)] -pub struct Metrics { - pub ts: i64, - pub cpu_count: u32, - pub cpu_used_pct: f32, - pub mem_total_mib: u64, - pub mem_used_mib: u64, - pub mem_total: u64, - pub mem_used: u64, - pub disk_used: u64, - pub disk_total: u64, -} - -pub fn get_metrics() -> Result { - use sysinfo::System; - - let mut sys = System::new(); - sys.refresh_memory(); - sys.refresh_cpu_all(); - - std::thread::sleep(std::time::Duration::from_millis(100)); - sys.refresh_cpu_all(); - - let cpu_count = sys.cpus().len() as u32; - let cpu_used_pct = sys.global_cpu_usage(); - let cpu_used_pct_rounded = if cpu_used_pct > 0.0 { - (cpu_used_pct * 100.0).round() / 100.0 - } else { - 0.0 - }; - - let mem_total = sys.total_memory(); - let mem_used = sys.used_memory(); - - let (disk_total, disk_used) = disk_stats("/")?; - - let ts = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64; - - Ok(Metrics { - ts, - cpu_count, - cpu_used_pct: cpu_used_pct_rounded, - mem_total_mib: mem_total / 1024 / 1024, - mem_used_mib: mem_used / 1024 / 1024, - mem_total, - mem_used, - disk_used, - disk_total, - }) -} - -fn disk_stats(path: &str) -> Result<(u64, u64), String> { - let c_path = CString::new(path).unwrap(); - let mut stat: libc::statfs = unsafe { std::mem::zeroed() }; - let ret = unsafe { libc::statfs(c_path.as_ptr(), &mut stat) }; - if ret != 0 { - return Err(format!("statfs failed: {}", std::io::Error::last_os_error())); - } - - let block = stat.f_bsize as u64; - let total = stat.f_blocks * block; - let available = stat.f_bavail * block; - - Ok((total, total - available)) -} diff --git a/envd-rs/src/host/mmds.rs b/envd-rs/src/host/mmds.rs deleted file mode 100644 index e2bf5bb..0000000 --- a/envd-rs/src/host/mmds.rs +++ /dev/null @@ -1,120 +0,0 @@ -use std::sync::Arc; -use std::time::Duration; - -use dashmap::DashMap; -use serde::Deserialize; -use tokio_util::sync::CancellationToken; - -use crate::config::{MMDS_ADDRESS, MMDS_POLL_INTERVAL, MMDS_TOKEN_EXPIRATION_SECS, WRENN_RUN_DIR}; - -#[derive(Debug, Clone, Deserialize)] -pub struct MMDSOpts { - #[serde(rename = "instanceID")] - pub sandbox_id: String, - #[serde(rename = "envID")] - pub template_id: String, - #[serde(rename = "address", default)] - pub logs_collector_address: String, - #[serde(rename = "accessTokenHash", default)] - pub access_token_hash: String, -} - -async fn get_mmds_token(client: &reqwest::Client) -> Result { - let resp = client - .put(format!("http://{MMDS_ADDRESS}/latest/api/token")) - .header( - "X-metadata-token-ttl-seconds", - MMDS_TOKEN_EXPIRATION_SECS.to_string(), - ) - .send() - .await - .map_err(|e| format!("mmds token request failed: {e}"))?; - - let token = resp.text().await.map_err(|e| format!("mmds token read: {e}"))?; - if token.is_empty() { - return Err("mmds token is an empty string".into()); - } - Ok(token) -} - -async fn get_mmds_opts(client: &reqwest::Client, token: &str) -> Result { - let resp = client - .get(format!("http://{MMDS_ADDRESS}")) - .header("X-metadata-token", token) - .header("Accept", "application/json") - .send() - .await - .map_err(|e| format!("mmds opts request failed: {e}"))?; - - resp.json::() - .await - .map_err(|e| format!("mmds opts parse: {e}")) -} - -pub async fn get_access_token_hash() -> Result { - let client = reqwest::Client::builder() - .timeout(Duration::from_secs(10)) - .no_proxy() - .build() - .map_err(|e| format!("http client: {e}"))?; - - let token = get_mmds_token(&client).await?; - let opts = get_mmds_opts(&client, &token).await?; - Ok(opts.access_token_hash) -} - -/// Polls MMDS every 50ms until metadata is available. -/// Stores sandbox_id and template_id in env_vars and writes to /run/wrenn/ files. -pub async fn poll_for_opts( - env_vars: Arc>, - cancel: CancellationToken, -) -> Option { - let client = reqwest::Client::builder() - .no_proxy() - .build() - .ok()?; - - let mut interval = tokio::time::interval(MMDS_POLL_INTERVAL); - - loop { - tokio::select! { - _ = cancel.cancelled() => { - tracing::warn!("context cancelled while waiting for mmds opts"); - return None; - } - _ = interval.tick() => { - let token = match get_mmds_token(&client).await { - Ok(t) => t, - Err(e) => { - tracing::debug!(error = %e, "mmds token poll"); - continue; - } - }; - - let opts = match get_mmds_opts(&client, &token).await { - Ok(o) => o, - Err(e) => { - tracing::debug!(error = %e, "mmds opts poll"); - continue; - } - }; - - env_vars.insert("WRENN_SANDBOX_ID".into(), opts.sandbox_id.clone()); - env_vars.insert("WRENN_TEMPLATE_ID".into(), opts.template_id.clone()); - - let run_dir = std::path::Path::new(WRENN_RUN_DIR); - if let Err(e) = std::fs::create_dir_all(run_dir) { - tracing::error!(error = %e, "mmds: failed to create run dir"); - } - if let Err(e) = std::fs::write(run_dir.join(".WRENN_SANDBOX_ID"), &opts.sandbox_id) { - tracing::error!(error = %e, "mmds: failed to write .WRENN_SANDBOX_ID"); - } - if let Err(e) = std::fs::write(run_dir.join(".WRENN_TEMPLATE_ID"), &opts.template_id) { - tracing::error!(error = %e, "mmds: failed to write .WRENN_TEMPLATE_ID"); - } - - return Some(opts); - } - } - } -} diff --git a/envd-rs/src/host/mod.rs b/envd-rs/src/host/mod.rs deleted file mode 100644 index a8ba613..0000000 --- a/envd-rs/src/host/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod metrics; -pub mod mmds; diff --git a/envd-rs/src/http/health.rs b/envd-rs/src/http/health.rs index 39d61c9..4a29f9a 100644 --- a/envd-rs/src/http/health.rs +++ b/envd-rs/src/http/health.rs @@ -1,5 +1,4 @@ use std::sync::Arc; -use std::sync::atomic::Ordering; use axum::Json; use axum::extract::State; @@ -10,13 +9,7 @@ use serde_json::json; use crate::state::AppState; pub async fn get_health(State(state): State>) -> impl IntoResponse { - if state - .needs_restore - .compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed) - .is_ok() - { - post_restore_recovery(&state); - } + state.try_restore_recovery(); tracing::trace!("health check"); @@ -25,17 +18,3 @@ pub async fn get_health(State(state): State>) -> impl IntoResponse Json(json!({ "version": state.version })), ) } - -fn post_restore_recovery(state: &AppState) { - tracing::info!("restore: post-restore recovery (no GC needed in Rust)"); - - state.snapshot_in_progress.store(false, std::sync::atomic::Ordering::Release); - - state.conn_tracker.restore_after_snapshot(); - tracing::info!("restore: zombie connections closed"); - - if let Some(ref ps) = state.port_subsystem { - ps.restart(); - tracing::info!("restore: port subsystem restarted"); - } -} diff --git a/envd-rs/src/http/init.rs b/envd-rs/src/http/init.rs index 840cab0..0001912 100644 --- a/envd-rs/src/http/init.rs +++ b/envd-rs/src/http/init.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; use std::sync::Arc; -use std::sync::atomic::Ordering; use axum::Json; use axum::extract::State; @@ -8,20 +7,25 @@ use axum::http::{StatusCode, header}; use axum::response::IntoResponse; use serde::Deserialize; -use crate::crypto; -use crate::host::mmds; use crate::state::AppState; #[derive(Deserialize, Default)] -#[serde(rename_all = "camelCase")] pub struct InitRequest { + #[serde(rename = "access_token")] pub access_token: Option, + #[serde(rename = "defaultUser")] pub default_user: Option, + #[serde(rename = "defaultWorkdir")] pub default_workdir: Option, + #[serde(rename = "envVars")] pub env_vars: Option>, + #[serde(rename = "hyperloop_ip")] pub hyperloop_ip: Option, pub timestamp: Option, + #[serde(rename = "volume_mounts")] pub volume_mounts: Option>, + pub sandbox_id: Option, + pub template_id: Option, } #[derive(Deserialize)] @@ -110,37 +114,27 @@ pub async fn post_init( } } - // Re-poll MMDS in background - if state.is_fc { - let env_vars = Arc::clone(&state.defaults.env_vars); - let cancel = tokio_util::sync::CancellationToken::new(); - let cancel_clone = cancel.clone(); - tokio::spawn(async move { - tokio::time::timeout(std::time::Duration::from_secs(60), async { - mmds::poll_for_opts(env_vars, cancel_clone).await; - }) - .await - .ok(); - }); + // Set sandbox/template metadata from request body. + if let Some(ref id) = init_req.sandbox_id { + tracing::debug!(sandbox_id = %id, "setting sandbox ID from init request"); + // SAFETY: envd is single-threaded at init time; no concurrent env reads. + unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) }; + write_run_file(".WRENN_SANDBOX_ID", id); + state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone()); + } + if let Some(ref id) = init_req.template_id { + tracing::debug!(template_id = %id, "setting template ID from init request"); + // SAFETY: envd is single-threaded at init time; no concurrent env reads. + unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) }; + write_run_file(".WRENN_TEMPLATE_ID", id); + state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone()); } trigger_restore_and_respond(&state).await } async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Response { - // Safety net: if health check's postRestoreRecovery hasn't run yet - if state - .needs_restore - .compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed) - .is_ok() - { - post_restore_recovery(state); - } - - state.conn_tracker.restore_after_snapshot(); - if let Some(ref ps) = state.port_subsystem { - ps.restart(); - } + state.try_restore_recovery(); ( StatusCode::NO_CONTENT, @@ -149,46 +143,13 @@ async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Respon .into_response() } -fn post_restore_recovery(state: &AppState) { - tracing::info!("restore: post-restore recovery (no GC needed in Rust)"); - - state.snapshot_in_progress.store(false, std::sync::atomic::Ordering::Release); - - state.conn_tracker.restore_after_snapshot(); - - if let Some(ref ps) = state.port_subsystem { - ps.restart(); - tracing::info!("restore: port subsystem restarted"); - } -} - async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> { // Fast path: matches existing token if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) { return Ok(()); } - // Check MMDS hash - if state.is_fc { - if let Ok(mmds_hash) = mmds::get_access_token_hash().await { - if !mmds_hash.is_empty() { - if request_token.is_empty() { - let empty_hash = crypto::sha512::hash_access_token(""); - if mmds_hash == empty_hash { - return Ok(()); - } - } else { - let token_hash = crypto::sha512::hash_access_token(request_token); - if mmds_hash == token_hash { - return Ok(()); - } - } - return Err("access token validation failed".into()); - } - } - } - - // First-time setup: no existing token and no MMDS + // First-time setup: no existing token if !state.access_token.is_set() { return Ok(()); } @@ -268,14 +229,21 @@ async fn setup_nfs(nfs_target: &str, path: &str) { } } +fn write_run_file(name: &str, value: &str) { + let dir = std::path::Path::new("/run/wrenn"); + if let Err(e) = std::fs::create_dir_all(dir) { + tracing::warn!(error = %e, "failed to create /run/wrenn"); + return; + } + if let Err(e) = std::fs::write(dir.join(name), value) { + tracing::warn!(error = %e, name, "failed to write run file"); + } +} + fn chrono_parse_to_nanos(ts: &str) -> Result { - // Parse RFC3339 timestamp to nanoseconds since epoch - // Simple approach: parse as seconds + fractional let secs = ts.parse::().ok(); if let Some(s) = secs { return Ok((s * 1_000_000_000.0) as i64); } - // Try RFC3339 format - // For now, fall back to allowing the update Err(()) } diff --git a/envd-rs/src/http/snapshot.rs b/envd-rs/src/http/snapshot.rs index e507d8f..e7aa717 100644 --- a/envd-rs/src/http/snapshot.rs +++ b/envd-rs/src/http/snapshot.rs @@ -7,7 +7,7 @@ use axum::response::IntoResponse; use crate::state::AppState; -/// POST /snapshot/prepare — quiesce subsystems before Firecracker snapshot. +/// POST /snapshot/prepare — quiesce subsystems before VM snapshot. /// /// In Rust there is no GC dance. We just: /// 1. Drop page cache to shrink snapshot size diff --git a/envd-rs/src/main.rs b/envd-rs/src/main.rs index 9e33fec..a487026 100644 --- a/envd-rs/src/main.rs +++ b/envd-rs/src/main.rs @@ -6,7 +6,6 @@ mod config; mod conntracker; mod crypto; mod execcontext; -mod host; mod http; mod logging; mod permissions; @@ -22,7 +21,6 @@ use std::sync::Arc; use clap::Parser; use tokio::net::TcpListener; -use tokio_util::sync::CancellationToken; use config::{DEFAULT_PORT, DEFAULT_USER, WRENN_RUN_DIR}; use execcontext::Defaults; @@ -44,9 +42,6 @@ struct Cli { #[arg(long, default_value_t = DEFAULT_PORT)] port: u16, - #[arg(long = "isnotfc", default_value_t = false)] - is_not_fc: bool, - #[arg(long)] version: bool, @@ -73,35 +68,22 @@ async fn main() { return; } - let use_json = !cli.is_not_fc; - logging::init(use_json); + logging::init(true); if let Err(e) = fs::create_dir_all(WRENN_RUN_DIR) { tracing::error!(error = %e, "failed to create wrenn run directory"); } let defaults = Defaults::new(DEFAULT_USER); - let is_fc_str = if cli.is_not_fc { "false" } else { "true" }; defaults .env_vars - .insert("WRENN_SANDBOX".into(), is_fc_str.into()); + .insert("WRENN_SANDBOX".into(), "true".into()); let wrenn_sandbox_path = Path::new(WRENN_RUN_DIR).join(".WRENN_SANDBOX"); - if let Err(e) = fs::write(&wrenn_sandbox_path, is_fc_str.as_bytes()) { + if let Err(e) = fs::write(&wrenn_sandbox_path, b"true") { tracing::error!(error = %e, "failed to write sandbox file"); } - let cancel = CancellationToken::new(); - - // MMDS polling (only in FC mode) - if !cli.is_not_fc { - let env_vars = Arc::clone(&defaults.env_vars); - let cancel_clone = cancel.clone(); - tokio::spawn(async move { - host::mmds::poll_for_opts(env_vars, cancel_clone).await; - }); - } - // Cgroup manager let cgroup_manager: Arc = match cgroups::Cgroup2Manager::new( @@ -143,14 +125,13 @@ async fn main() { defaults, VERSION.to_string(), COMMIT.to_string(), - !cli.is_not_fc, Some(Arc::clone(&port_subsystem)), ); // Memory reclaimer — drop page cache when available memory is low. - // Firecracker balloon device can only reclaim pages the guest kernel freed. + // The balloon device can only reclaim pages the guest kernel freed. // Pauses during snapshot/prepare to avoid corrupting kernel page table state. - if !cli.is_not_fc { + { let state_for_reclaimer = Arc::clone(&state); std::thread::spawn(move || memory_reclaimer(state_for_reclaimer)); } @@ -188,7 +169,6 @@ async fn main() { } port_subsystem.stop(); - cancel.cancel(); } fn spawn_initial_command(cmd: &str, state: &AppState) { @@ -233,9 +213,11 @@ fn spawn_initial_command(cmd: &str, state: &AppState) { fn memory_reclaimer(state: Arc) { use std::sync::atomic::Ordering; + use std::time::{Duration, SystemTime, UNIX_EPOCH}; - const CHECK_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10); + const CHECK_INTERVAL: Duration = Duration::from_secs(10); const DROP_THRESHOLD_PCT: u64 = 80; + const RESTORE_GRACE_SECS: u64 = 30; loop { std::thread::sleep(CHECK_INTERVAL); @@ -244,6 +226,20 @@ fn memory_reclaimer(state: Arc) { continue; } + // Skip during post-restore grace period. Balloon deflation causes + // transient high memory that resolves on its own — triggering + // drop_caches during UFFD page fault storms makes the guest unresponsive. + let restore_epoch = state.restore_epoch.load(Ordering::Acquire); + if restore_epoch > 0 { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + if now.saturating_sub(restore_epoch) < RESTORE_GRACE_SECS { + continue; + } + } + let mut sys = sysinfo::System::new(); sys.refresh_memory(); let total = sys.total_memory(); diff --git a/envd-rs/src/port/scanner.rs b/envd-rs/src/port/scanner.rs index ea8d3be..ea613e5 100644 --- a/envd-rs/src/port/scanner.rs +++ b/envd-rs/src/port/scanner.rs @@ -57,7 +57,9 @@ impl Scanner { pub async fn scan_and_broadcast(&self, cancel: CancellationToken) { loop { - let conns = read_tcp_connections(); + let conns = tokio::task::spawn_blocking(read_tcp_connections) + .await + .unwrap_or_default(); { let subs = self.subs.read().unwrap(); diff --git a/envd-rs/src/state.rs b/envd-rs/src/state.rs index 33d170a..2f2858a 100644 --- a/envd-rs/src/state.rs +++ b/envd-rs/src/state.rs @@ -1,5 +1,6 @@ -use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; use crate::auth::token::SecureToken; use crate::conntracker::ConnTracker; @@ -11,7 +12,6 @@ pub struct AppState { pub defaults: Defaults, pub version: String, pub commit: String, - pub is_fc: bool, pub needs_restore: AtomicBool, pub last_set_time: AtomicMax, pub access_token: SecureToken, @@ -20,6 +20,8 @@ pub struct AppState { pub cpu_used_pct: AtomicU32, pub cpu_count: AtomicU32, pub snapshot_in_progress: AtomicBool, + pub last_health_epoch: AtomicU64, + pub restore_epoch: AtomicU64, } impl AppState { @@ -27,14 +29,12 @@ impl AppState { defaults: Defaults, version: String, commit: String, - is_fc: bool, port_subsystem: Option>, ) -> Arc { let state = Arc::new(Self { defaults, version, commit, - is_fc, needs_restore: AtomicBool::new(false), last_set_time: AtomicMax::new(), access_token: SecureToken::new(), @@ -43,6 +43,8 @@ impl AppState { cpu_used_pct: AtomicU32::new(0), cpu_count: AtomicU32::new(0), snapshot_in_progress: AtomicBool::new(false), + last_health_epoch: AtomicU64::new(0), + restore_epoch: AtomicU64::new(0), }); let state_clone = Arc::clone(&state); @@ -60,6 +62,47 @@ impl AppState { pub fn cpu_count(&self) -> u32 { self.cpu_count.load(Ordering::Relaxed) } + + /// Runs post-restore recovery if `needs_restore` is set OR a wall-clock + /// gap is detected (catches restores where snapshot/prepare never ran). + pub fn try_restore_recovery(&self) { + let now_epoch = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let prev_epoch = self.last_health_epoch.swap(now_epoch, Ordering::AcqRel); + + // Detect restore via wall-clock gap: if >3s passed since last health + // check, the VM was frozen and restored. Catches the case where + // snapshot/prepare timed out and needs_restore was never set. + let gap_detected = prev_epoch > 0 && now_epoch.saturating_sub(prev_epoch) > 3; + + let flag_set = self + .needs_restore + .compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed) + .is_ok(); + + if !flag_set && !gap_detected { + return; + } + + if gap_detected && !flag_set { + tracing::info!( + gap_secs = now_epoch.saturating_sub(prev_epoch), + "restore: detected via wall-clock gap (needs_restore was not set)" + ); + } + + tracing::info!("restore: post-restore recovery"); + self.snapshot_in_progress.store(false, Ordering::Release); + self.restore_epoch.store(now_epoch, Ordering::Release); + self.conn_tracker.restore_after_snapshot(); + + if let Some(ref ps) = self.port_subsystem { + ps.restart(); + tracing::info!("restore: port subsystem restarted"); + } + } } fn cpu_sampler(state: Arc) { @@ -70,6 +113,15 @@ fn cpu_sampler(state: Arc) { loop { std::thread::sleep(std::time::Duration::from_secs(1)); + + if state.needs_restore.load(Ordering::Acquire) { + // After snapshot restore, sysinfo's internal CPU counters are stale. + // Reinitialize to get a fresh baseline. + sys = System::new(); + sys.refresh_cpu_all(); + continue; + } + sys.refresh_cpu_all(); let pct = sys.global_cpu_usage();