1
0
forked from wrenn/wrenn

feat(envd): update guest agent for Cloud Hypervisor

Remove Firecracker-specific MMDS metadata fetching and metrics host
module. CH communicates with the guest purely over TAP networking,
so MMDS (Firecracker's metadata service via MMDS address) is no longer
needed.

- Remove src/host/ module (mmds.rs, metrics.rs)
- Remove reqwest dependency (was only used for MMDS HTTP calls)
- Remove --isnotfc CLI flag (no longer dual-mode)
- Simplify health endpoint and init handler
- Update state management for CH snapshot lifecycle
- Bump version to 0.3.0
This commit is contained in:
2026-05-17 01:33:25 +06:00
parent eaa6b8576d
commit dd8a940431
13 changed files with 124 additions and 755 deletions

View File

@ -1,6 +1,5 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use axum::Json;
use axum::extract::State;
@ -8,20 +7,25 @@ use axum::http::{StatusCode, header};
use axum::response::IntoResponse;
use serde::Deserialize;
use crate::crypto;
use crate::host::mmds;
use crate::state::AppState;
#[derive(Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct InitRequest {
#[serde(rename = "access_token")]
pub access_token: Option<String>,
#[serde(rename = "defaultUser")]
pub default_user: Option<String>,
#[serde(rename = "defaultWorkdir")]
pub default_workdir: Option<String>,
#[serde(rename = "envVars")]
pub env_vars: Option<HashMap<String, String>>,
#[serde(rename = "hyperloop_ip")]
pub hyperloop_ip: Option<String>,
pub timestamp: Option<String>,
#[serde(rename = "volume_mounts")]
pub volume_mounts: Option<Vec<VolumeMount>>,
pub sandbox_id: Option<String>,
pub template_id: Option<String>,
}
#[derive(Deserialize)]
@ -110,37 +114,27 @@ pub async fn post_init(
}
}
// Re-poll MMDS in background
if state.is_fc {
let env_vars = Arc::clone(&state.defaults.env_vars);
let cancel = tokio_util::sync::CancellationToken::new();
let cancel_clone = cancel.clone();
tokio::spawn(async move {
tokio::time::timeout(std::time::Duration::from_secs(60), async {
mmds::poll_for_opts(env_vars, cancel_clone).await;
})
.await
.ok();
});
// Set sandbox/template metadata from request body.
if let Some(ref id) = init_req.sandbox_id {
tracing::debug!(sandbox_id = %id, "setting sandbox ID from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_SANDBOX_ID", id) };
write_run_file(".WRENN_SANDBOX_ID", id);
state.defaults.env_vars.insert("WRENN_SANDBOX_ID".into(), id.clone());
}
if let Some(ref id) = init_req.template_id {
tracing::debug!(template_id = %id, "setting template ID from init request");
// SAFETY: envd is single-threaded at init time; no concurrent env reads.
unsafe { std::env::set_var("WRENN_TEMPLATE_ID", id) };
write_run_file(".WRENN_TEMPLATE_ID", id);
state.defaults.env_vars.insert("WRENN_TEMPLATE_ID".into(), id.clone());
}
trigger_restore_and_respond(&state).await
}
async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Response {
// Safety net: if health check's postRestoreRecovery hasn't run yet
if state
.needs_restore
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
post_restore_recovery(state);
}
state.conn_tracker.restore_after_snapshot();
if let Some(ref ps) = state.port_subsystem {
ps.restart();
}
state.try_restore_recovery();
(
StatusCode::NO_CONTENT,
@ -149,46 +143,13 @@ async fn trigger_restore_and_respond(state: &AppState) -> axum::response::Respon
.into_response()
}
fn post_restore_recovery(state: &AppState) {
tracing::info!("restore: post-restore recovery (no GC needed in Rust)");
state.snapshot_in_progress.store(false, std::sync::atomic::Ordering::Release);
state.conn_tracker.restore_after_snapshot();
if let Some(ref ps) = state.port_subsystem {
ps.restart();
tracing::info!("restore: port subsystem restarted");
}
}
async fn validate_init_access_token(state: &AppState, request_token: &str) -> Result<(), String> {
// Fast path: matches existing token
if state.access_token.is_set() && !request_token.is_empty() && state.access_token.equals(request_token) {
return Ok(());
}
// Check MMDS hash
if state.is_fc {
if let Ok(mmds_hash) = mmds::get_access_token_hash().await {
if !mmds_hash.is_empty() {
if request_token.is_empty() {
let empty_hash = crypto::sha512::hash_access_token("");
if mmds_hash == empty_hash {
return Ok(());
}
} else {
let token_hash = crypto::sha512::hash_access_token(request_token);
if mmds_hash == token_hash {
return Ok(());
}
}
return Err("access token validation failed".into());
}
}
}
// First-time setup: no existing token and no MMDS
// First-time setup: no existing token
if !state.access_token.is_set() {
return Ok(());
}
@ -268,14 +229,21 @@ async fn setup_nfs(nfs_target: &str, path: &str) {
}
}
fn write_run_file(name: &str, value: &str) {
let dir = std::path::Path::new("/run/wrenn");
if let Err(e) = std::fs::create_dir_all(dir) {
tracing::warn!(error = %e, "failed to create /run/wrenn");
return;
}
if let Err(e) = std::fs::write(dir.join(name), value) {
tracing::warn!(error = %e, name, "failed to write run file");
}
}
fn chrono_parse_to_nanos(ts: &str) -> Result<i64, ()> {
// Parse RFC3339 timestamp to nanoseconds since epoch
// Simple approach: parse as seconds + fractional
let secs = ts.parse::<f64>().ok();
if let Some(s) = secs {
return Ok((s * 1_000_000_000.0) as i64);
}
// Try RFC3339 format
// For now, fall back to allowing the update
Err(())
}