From 5b4fde055c3e070c7effbfe1cf62e241dbdc4218 Mon Sep 17 00:00:00 2001 From: pptx704 Date: Wed, 15 Apr 2026 18:24:54 +0600 Subject: [PATCH] Fix build recipe execution and flatten reliability - Set HOME in bctx.EnvVars when USER switches so ~ expands correctly in subsequent RUN/WORKDIR steps instead of resolving to /root - Run /bin/sync inside the guest before FlattenRootfs destroys the VM, preventing pip-installed files from being captured as 0-byte due to unflushed page cache - Wrap healthcheck command with su so it runs with the template's default user context (correct HOME, correct UID) - Export Shellescape from the recipe package for use in build service - Add code-runner-beta recipe (Jupyter server with ipykernel --sys-prefix) and replace old python-interpreter-v0-beta --- internal/recipe/context.go | 7 +++++-- internal/recipe/executor.go | 9 +++++++++ internal/sandbox/manager.go | 12 ++++++++++++ internal/service/build.go | 13 ++++++++++--- recipes/code-runner-beta.healthcheck | 1 + recipes/code-runner-beta.recipefile | 9 +++++++++ recipes/python-interpreter-v0-beta.healthcheck | 1 - recipes/python-interpreter-v0-beta.recipefile | 7 ------- 8 files changed, 46 insertions(+), 13 deletions(-) create mode 100644 recipes/code-runner-beta.healthcheck create mode 100644 recipes/code-runner-beta.recipefile delete mode 100644 recipes/python-interpreter-v0-beta.healthcheck delete mode 100644 recipes/python-interpreter-v0-beta.recipefile diff --git a/internal/recipe/context.go b/internal/recipe/context.go index 820e717..3a64059 100644 --- a/internal/recipe/context.go +++ b/internal/recipe/context.go @@ -115,8 +115,11 @@ func expandEnv(s string, vars map[string]string) string { }) } -// shellescape wraps s in single quotes, escaping any embedded single quotes. +// Shellescape wraps s in single quotes, escaping any embedded single quotes. // This is POSIX-safe for paths, env values, and shell commands. -func shellescape(s string) string { +func Shellescape(s string) string { return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" } + +// shellescape is the package-internal alias for Shellescape. +func shellescape(s string) string { return Shellescape(s) } diff --git a/internal/recipe/executor.go b/internal/recipe/executor.go index 38a8b12..ffecf04 100644 --- a/internal/recipe/executor.go +++ b/internal/recipe/executor.go @@ -193,6 +193,15 @@ func execUser( entry := execRawShell(ctx, st.Raw, sandboxID, phase, step, 30*time.Second, execFn, script) if entry.Ok { bctx.User = username + // Update HOME so ~ expands correctly in subsequent RUN/WORKDIR steps. + if bctx.EnvVars == nil { + bctx.EnvVars = make(map[string]string) + } + if username == "root" { + bctx.EnvVars["HOME"] = "/root" + } else { + bctx.EnvVars["HOME"] = "/home/" + username + } } return entry, entry.Ok } diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 3dde053..576a3e9 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -901,6 +901,18 @@ func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID string, teamID, t return 0, fmt.Errorf("sandbox %s not found", sandboxID) } + // Flush guest page cache to disk before stopping the VM. Without this, + // files written by the build (e.g. pip-installed packages) may exist in the + // guest's page cache but not yet on the dm block device — flatten would then + // capture 0-byte files. + func() { + syncCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + if _, err := sb.client.Exec(syncCtx, "/bin/sync"); err != nil { + slog.Warn("flatten: guest sync failed (non-fatal)", "id", sb.ID, "error", err) + } + }() + // Stop the VM but keep the dm device alive for flattening. m.stopSampler(sb) if err := m.vm.Destroy(ctx, sb.ID); err != nil { diff --git a/internal/service/build.go b/internal/service/build.go index 563c8cd..92d826d 100644 --- a/internal/service/build.go +++ b/internal/service/build.go @@ -444,7 +444,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { return } log.Info("running healthcheck", "cmd", hc.Cmd, "interval", hc.Interval, "timeout", hc.Timeout, "start_period", hc.StartPeriod, "retries", hc.Retries) - if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc); err != nil { + if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc, templateDefaultUser); err != nil { s.destroySandbox(buildCtx, agent, sandboxIDStr) if buildCtx.Err() != nil { return @@ -544,7 +544,14 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { // During the start period, failures are not counted toward the retry budget. // Returns nil on the first successful check, or an error if retries are // exhausted, the deadline passes, or the context is cancelled. -func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig) error { +func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig, user string) error { + // Wrap the healthcheck command with su when a non-root user is set, so that + // ~ expands to the correct home directory and the process runs with the + // right UID (matching the template's default user). + cmd := hc.Cmd + if user != "" && user != "root" { + cmd = "su " + recipe.Shellescape(user) + " -s /bin/sh -c " + recipe.Shellescape(hc.Cmd) + } ticker := time.NewTicker(hc.Interval) defer ticker.Stop() @@ -571,7 +578,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ SandboxId: sandboxIDStr, Cmd: "/bin/sh", - Args: []string{"-c", hc.Cmd}, + Args: []string{"-c", cmd}, TimeoutSec: int32(hc.Timeout.Seconds()), })) cancel() diff --git a/recipes/code-runner-beta.healthcheck b/recipes/code-runner-beta.healthcheck new file mode 100644 index 0000000..186da39 --- /dev/null +++ b/recipes/code-runner-beta.healthcheck @@ -0,0 +1 @@ +--interval=5s --timeout=5s --start-period=60s --retries=5 curl -sf http://127.0.0.1:8888/api/status diff --git a/recipes/code-runner-beta.recipefile b/recipes/code-runner-beta.recipefile new file mode 100644 index 0000000..dc96779 --- /dev/null +++ b/recipes/code-runner-beta.recipefile @@ -0,0 +1,9 @@ +RUN --timeout=5m sudo apt install -y python3 python3-pip python3-venv +ENV PYTHONUNBUFFERED=1 + +RUN python3 -m venv ~/jupyter-env +RUN --timeout=5m ~/jupyter-env/bin/pip install --upgrade pip +RUN --timeout=5m ~/jupyter-env/bin/pip install jupyter-server ipykernel +RUN --timeout=5m ~/jupyter-env/bin/python -m ipykernel install --sys-prefix + +START ~/jupyter-env/bin/jupyter server --ServerApp.ip=0.0.0.0 --ServerApp.port=8888 --ServerApp.token='' --ServerApp.password='' --ServerApp.allow_origin='*' --ServerApp.disable_check_xsrf=True --no-browser --log-level=INFO diff --git a/recipes/python-interpreter-v0-beta.healthcheck b/recipes/python-interpreter-v0-beta.healthcheck deleted file mode 100644 index ca2555c..0000000 --- a/recipes/python-interpreter-v0-beta.healthcheck +++ /dev/null @@ -1 +0,0 @@ ---interval=5s --timeout=3s --start-period=3s --retries=3 python3 -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8888/api/status', timeout=3)" diff --git a/recipes/python-interpreter-v0-beta.recipefile b/recipes/python-interpreter-v0-beta.recipefile deleted file mode 100644 index e83f5da..0000000 --- a/recipes/python-interpreter-v0-beta.recipefile +++ /dev/null @@ -1,7 +0,0 @@ -RUN apt-get install -y --no-install-recommends python3 python3-pip python3-venv -RUN python3 -m venv /opt/venv -ENV PATH=/opt/venv/bin:$PATH - -RUN --timeout=5m pip install --no-cache-dir jupyter-server ipykernel - -START jupyter server --ServerApp.ip=0.0.0.0 --ServerApp.port=8888 --ServerApp.token='' --ServerApp.allow_origin='*' --ServerApp.disable_check_xsrf=True --no-browser --allow-root