forked from wrenn/wrenn
fix: resolve PTY failure, MMDS file writes, and metrics instability in envd-rs
Three bugs fixed:
1. PTY connections failed because home directory was hardcoded as
/home/{username} instead of reading from /etc/passwd. For root,
this produced /home/root/ which doesn't exist — CWD validation
rejected every PTY Start request without explicit cwd. Fixed all
6 locations to use user.dir from nix::unistd::User.
2. MMDS polling silently failed to parse metadata because the
logs_collector_address field lacked #[serde(default)]. The host
agent only sends instanceID + envID — missing "address" field
caused every deserialize attempt to fail, so .WRENN_SANDBOX_ID
and .WRENN_TEMPLATE_ID were never written. Also added error
logging and create_dir_all before file writes.
3. Metrics CPU values were non-deterministic because a fresh
sysinfo::System was created per request with a 100ms sleep
between reads. Replaced with a background thread that samples
CPU at fixed 1-second intervals via a persistent System instance,
matching gopsutil's internal caching behavior. Metrics endpoint
now reads cached atomic values — no blocking, consistent window.
Also: close master PTY fd in child pre_exec, add process.Start
request logging, bump version to 0.2.0.
This commit is contained in:
@ -95,7 +95,7 @@ pub async fn get_files(
|
||||
Err(e) => return json_error(StatusCode::UNAUTHORIZED, &e),
|
||||
};
|
||||
|
||||
let home_dir = format!("/home/{}", user.name);
|
||||
let home_dir = user.dir.to_string_lossy().to_string();
|
||||
let resolved = match expand_and_resolve(path_str, &home_dir, state.defaults.workdir.as_deref())
|
||||
{
|
||||
Ok(p) => p,
|
||||
@ -246,7 +246,7 @@ pub async fn post_files(
|
||||
Err(e) => return json_error(StatusCode::UNAUTHORIZED, &e),
|
||||
};
|
||||
|
||||
let home_dir = format!("/home/{}", user.name);
|
||||
let home_dir = user.dir.to_string_lossy().to_string();
|
||||
let uid = user.uid;
|
||||
let gid = user.gid;
|
||||
|
||||
|
||||
@ -22,10 +22,10 @@ pub struct Metrics {
|
||||
disk_total: u64,
|
||||
}
|
||||
|
||||
pub async fn get_metrics(State(_state): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
pub async fn get_metrics(State(state): State<Arc<AppState>>) -> impl IntoResponse {
|
||||
tracing::trace!("get metrics");
|
||||
|
||||
match collect_metrics() {
|
||||
match collect_metrics(&state) {
|
||||
Ok(m) => (
|
||||
StatusCode::OK,
|
||||
[(header::CACHE_CONTROL, "no-store")],
|
||||
@ -39,26 +39,12 @@ pub async fn get_metrics(State(_state): State<Arc<AppState>>) -> impl IntoRespon
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_metrics() -> Result<Metrics, String> {
|
||||
use sysinfo::System;
|
||||
fn collect_metrics(state: &AppState) -> Result<Metrics, String> {
|
||||
let cpu_count = state.cpu_count();
|
||||
let cpu_used_pct_rounded = state.cpu_used_pct();
|
||||
|
||||
let mut sys = System::new();
|
||||
let mut sys = sysinfo::System::new();
|
||||
sys.refresh_memory();
|
||||
sys.refresh_cpu_all();
|
||||
|
||||
// sysinfo needs a small delay for accurate CPU — first call returns 0.
|
||||
// In a real daemon this would be cached; for now, report instantaneous.
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
sys.refresh_cpu_all();
|
||||
|
||||
let cpu_count = sys.cpus().len() as u32;
|
||||
let cpu_used_pct = sys.global_cpu_usage();
|
||||
let cpu_used_pct_rounded = if cpu_used_pct > 0.0 {
|
||||
(cpu_used_pct * 100.0).round() / 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let mem_total = sys.total_memory();
|
||||
let mem_used = sys.used_memory();
|
||||
let mem_total_mib = mem_total / 1024 / 1024;
|
||||
|
||||
Reference in New Issue
Block a user