From 36782e1b4ff5ea3ed5688e3db5e04a836cab7616 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Mon, 23 Mar 2026 02:45:27 +0600 Subject: [PATCH] Add tini as PID 1, guest clock sync, and fix PATH in guest VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use tini as PID 1 in wrenn-init.sh so zombie processes are reaped and signals are forwarded correctly to envd - Set standard PATH in wrenn-init.sh so child processes spawned by envd can find common binaries (fixes "nice: ls command not found") - Add envdclient.Init() to POST /init on envd after every boot/resume, syncing the guest clock via unix.ClockSettime — critical after snapshot resume where the guest clock is frozen - Run Init in a background goroutine so it doesn't block the CreateSandbox RPC response; a slow Init (vCPU busy with envd startup) was causing the RPC context to be canceled before the response reached the control plane - Update rootfs-from-container.sh and update-debug-rootfs.sh to inject tini into the rootfs, checking the container image and host first, downloading from GitHub releases as fallback --- images/wrenn-init.sh | 7 ++++-- internal/envdclient/client.go | 31 ++++++++++++++++++++++++++ internal/sandbox/manager.go | 30 +++++++++++++++++++++++++ scripts/rootfs-from-container.sh | 36 ++++++++++++++++++++++++++++-- scripts/update-debug-rootfs.sh | 38 +++++++++++++++++++++++++++++--- 5 files changed, 135 insertions(+), 7 deletions(-) diff --git a/images/wrenn-init.sh b/images/wrenn-init.sh index 6e45e24..bec7731 100644 --- a/images/wrenn-init.sh +++ b/images/wrenn-init.sh @@ -23,5 +23,8 @@ hostname sandbox echo "nameserver 8.8.8.8" > /etc/resolv.conf echo "nameserver 8.8.4.4" >> /etc/resolv.conf -# Exec envd as the main process (replaces this script, keeps PID 1). -exec /usr/local/bin/envd +# Set a standard PATH so envd and all child processes can find common binaries. +export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + +# Exec tini as PID 1 — it reaps zombie processes and forwards signals to envd. +exec /sbin/tini -- /usr/local/bin/envd diff --git a/internal/envdclient/client.go b/internal/envdclient/client.go index 4976569..04a1dc2 100644 --- a/internal/envdclient/client.go +++ b/internal/envdclient/client.go @@ -3,12 +3,14 @@ package envdclient import ( "bytes" "context" + "encoding/json" "fmt" "io" "log/slog" "mime/multipart" "net/http" "net/url" + "time" "connectrpc.com/connect" @@ -47,6 +49,35 @@ func (c *Client) BaseURL() string { return c.base } +// Init calls POST /init on envd to sync the guest clock with the host. +// This is important after snapshot resume where the guest clock is frozen. +func (c *Client) Init(ctx context.Context) error { + now := time.Now().UTC() + body, err := json.Marshal(map[string]any{"timestamp": now}) + if err != nil { + return fmt.Errorf("marshal init body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.base+"/init", bytes.NewReader(body)) + if err != nil { + return fmt.Errorf("create init request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("init request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNoContent { + respBody, _ := io.ReadAll(resp.Body) + return fmt.Errorf("init: status %d: %s", resp.StatusCode, string(respBody)) + } + + return nil +} + // ExecResult holds the output of a command execution. type ExecResult struct { Stdout []byte diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index bd90e0a..ef4e092 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -197,6 +197,16 @@ func (m *Manager) Create(ctx context.Context, sandboxID, template string, vcpus, return nil, fmt.Errorf("wait for envd: %w", err) } + // Sync guest clock in background. Non-fatal — sandbox is usable before this completes. + // Run in a goroutine so Init latency doesn't block the RPC response back to the control plane. + go func() { + initCtx, initCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer initCancel() + if err := client.Init(initCtx); err != nil { + slog.Warn("envd init (clock sync) failed", "sandbox", sandboxID, "error", err) + } + }() + now := time.Now() sb := &sandboxState{ Sandbox: models.Sandbox{ @@ -617,6 +627,16 @@ func (m *Manager) Resume(ctx context.Context, sandboxID string, timeoutSec int) return nil, fmt.Errorf("wait for envd: %w", err) } + // Sync guest clock in background. Non-fatal — sandbox is usable before this completes. + // Run in a goroutine so Init latency doesn't block the RPC response back to the control plane. + go func() { + initCtx, initCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer initCancel() + if err := client.Init(initCtx); err != nil { + slog.Warn("envd init (clock sync) failed", "sandbox", sandboxID, "error", err) + } + }() + now := time.Now() sb := &sandboxState{ Sandbox: models.Sandbox{ @@ -926,6 +946,16 @@ func (m *Manager) createFromSnapshot(ctx context.Context, sandboxID, snapshotNam return nil, fmt.Errorf("wait for envd: %w", err) } + // Sync guest clock in background. Non-fatal — sandbox is usable before this completes. + // Run in a goroutine so Init latency doesn't block the RPC response back to the control plane. + go func() { + initCtx, initCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer initCancel() + if err := client.Init(initCtx); err != nil { + slog.Warn("envd init (clock sync) failed", "sandbox", sandboxID, "error", err) + } + }() + now := time.Now() sb := &sandboxState{ Sandbox: models.Sandbox{ diff --git a/scripts/rootfs-from-container.sh b/scripts/rootfs-from-container.sh index d44b38a..ce1dd52 100755 --- a/scripts/rootfs-from-container.sh +++ b/scripts/rootfs-from-container.sh @@ -15,7 +15,7 @@ # Output: # ${AGENT_FILES_ROOTDIR}/images//rootfs.ext4 # -# Requires: docker, mkfs.ext4, resize2fs, e2fsck, make (for building envd) +# Requires: docker, mkfs.ext4, resize2fs, e2fsck, make (for building envd), curl (for tini download) # Sudo is used only for mount/umount/copy-into-image operations. set -euo pipefail @@ -98,10 +98,42 @@ echo "==> Installing wrenn-init..." sudo cp "${PROJECT_ROOT}/images/wrenn-init.sh" "${MOUNT_DIR}/usr/local/bin/wrenn-init" sudo chmod 755 "${MOUNT_DIR}/usr/local/bin/wrenn-init" +echo "==> Installing tini..." +TINI_BIN="" +# 1. Already in the exported container image? +for p in "${MOUNT_DIR}/usr/bin/tini" "${MOUNT_DIR}/sbin/tini" "${MOUNT_DIR}/usr/local/bin/tini"; do + if [ -f "$p" ]; then TINI_BIN="$p"; break; fi +done +# 2. Available on the host? +if [ -z "${TINI_BIN}" ]; then + for p in /usr/bin/tini /usr/local/bin/tini /sbin/tini; do + if [ -f "$p" ]; then TINI_BIN="$p"; break; fi + done +fi +# 3. Download from GitHub releases. +if [ -z "${TINI_BIN}" ]; then + ARCH="$(uname -m)" + case "${ARCH}" in + x86_64) TINI_ARCH="amd64" ;; + aarch64) TINI_ARCH="arm64" ;; + *) echo "ERROR: Unsupported architecture: ${ARCH}"; exit 1 ;; + esac + TINI_VERSION="v0.19.0" + TINI_URL="https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TINI_ARCH}" + TINI_TMP="/tmp/tini-${TINI_ARCH}" + echo " Downloading tini ${TINI_VERSION} (${TINI_ARCH})..." + curl -fsSL "${TINI_URL}" -o "${TINI_TMP}" + chmod +x "${TINI_TMP}" + TINI_BIN="${TINI_TMP}" +fi +sudo mkdir -p "${MOUNT_DIR}/sbin" +sudo cp "${TINI_BIN}" "${MOUNT_DIR}/sbin/tini" +sudo chmod 755 "${MOUNT_DIR}/sbin/tini" + # Step 6: Verify. echo "" echo "==> Installed guest binaries:" -ls -la "${MOUNT_DIR}/usr/local/bin/envd" "${MOUNT_DIR}/usr/local/bin/wrenn-init" +ls -la "${MOUNT_DIR}/usr/local/bin/envd" "${MOUNT_DIR}/usr/local/bin/wrenn-init" "${MOUNT_DIR}/sbin/tini" # Unmount before shrinking. sudo umount "${MOUNT_DIR}" diff --git a/scripts/update-debug-rootfs.sh b/scripts/update-debug-rootfs.sh index f8487c6..7d0544e 100755 --- a/scripts/update-debug-rootfs.sh +++ b/scripts/update-debug-rootfs.sh @@ -11,13 +11,13 @@ # Usage: # bash scripts/update-debug-rootfs.sh [rootfs_path] # -# Defaults to /var/lib/wrenn/images/minimal.ext4 +# Defaults to /var/lib/wrenn/images/minimal/rootfs.ext4 set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -ROOTFS="${1:-/var/lib/wrenn/images/minimal.ext4}" +ROOTFS="${1:-/var/lib/wrenn/images/minimal/rootfs.ext4}" MOUNT_DIR="/tmp/wrenn-rootfs-update" if [ ! -f "${ROOTFS}" ]; then @@ -64,10 +64,42 @@ echo "==> Installing wrenn-init..." sudo cp "${PROJECT_ROOT}/images/wrenn-init.sh" "${MOUNT_DIR}/usr/local/bin/wrenn-init" sudo chmod 755 "${MOUNT_DIR}/usr/local/bin/wrenn-init" +echo "==> Installing tini..." +TINI_BIN="" +# 1. Already in the rootfs? +for p in "${MOUNT_DIR}/usr/bin/tini" "${MOUNT_DIR}/sbin/tini" "${MOUNT_DIR}/usr/local/bin/tini"; do + if [ -f "$p" ]; then TINI_BIN="$p"; break; fi +done +# 2. Available on the host? +if [ -z "${TINI_BIN}" ]; then + for p in /usr/bin/tini /usr/local/bin/tini /sbin/tini; do + if [ -f "$p" ]; then TINI_BIN="$p"; break; fi + done +fi +# 3. Download from GitHub releases. +if [ -z "${TINI_BIN}" ]; then + ARCH="$(uname -m)" + case "${ARCH}" in + x86_64) TINI_ARCH="amd64" ;; + aarch64) TINI_ARCH="arm64" ;; + *) echo "ERROR: Unsupported architecture: ${ARCH}"; exit 1 ;; + esac + TINI_VERSION="v0.19.0" + TINI_URL="https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${TINI_ARCH}" + TINI_TMP="/tmp/tini-${TINI_ARCH}" + echo " Downloading tini ${TINI_VERSION} (${TINI_ARCH})..." + curl -fsSL "${TINI_URL}" -o "${TINI_TMP}" + chmod +x "${TINI_TMP}" + TINI_BIN="${TINI_TMP}" +fi +sudo mkdir -p "${MOUNT_DIR}/sbin" +sudo cp "${TINI_BIN}" "${MOUNT_DIR}/sbin/tini" +sudo chmod 755 "${MOUNT_DIR}/sbin/tini" + # Step 4: Verify. echo "" echo "==> Installed files:" -ls -la "${MOUNT_DIR}/usr/local/bin/envd" "${MOUNT_DIR}/usr/local/bin/wrenn-init" +ls -la "${MOUNT_DIR}/usr/local/bin/envd" "${MOUNT_DIR}/usr/local/bin/wrenn-init" "${MOUNT_DIR}/sbin/tini" echo "" echo "==> Done. Rootfs updated: ${ROOTFS}"