diff --git a/internal/api/handlers_pty.go b/internal/api/handlers_pty.go index 6a906b1..cd5dcae 100644 --- a/internal/api/handlers_pty.go +++ b/internal/api/handlers_pty.go @@ -23,7 +23,6 @@ import ( ) const ( - ptyInactivityTimeout = 120 * time.Second ptyKeepaliveInterval = 30 * time.Second ptyDefaultCmd = "/bin/bash" ptyDefaultCols = 80 @@ -246,10 +245,6 @@ func runPtyLoop( ) { var wg sync.WaitGroup - // Inactivity timer — reset on input/resize, fires kill after timeout. - timer := time.NewTimer(ptyInactivityTimeout) - defer timer.Stop() - // Output pump: read from Connect stream, write to WebSocket. wg.Add(1) go func() { @@ -317,7 +312,6 @@ func runPtyLoop( })); err != nil { slog.Debug("pty send input error", "error", err) } - resetTimer(timer, ptyInactivityTimeout) case "resize": cols := msg.Cols @@ -331,7 +325,6 @@ func runPtyLoop( })); err != nil { slog.Debug("pty resize error", "error", err) } - resetTimer(timer, ptyInactivityTimeout) } case "kill": @@ -364,26 +357,6 @@ func runPtyLoop( } }() - // Inactivity timeout goroutine. - wg.Add(1) - go func() { - defer wg.Done() - select { - case <-timer.C: - slog.Info("pty session timed out", "sandbox_id", sandboxID, "tag", tag) - rpcCtx, rpcCancel := context.WithTimeout(context.Background(), 5*time.Second) - if _, err := agent.PtyKill(rpcCtx, connect.NewRequest(&pb.PtyKillRequest{ - SandboxId: sandboxID, - Tag: tag, - })); err != nil { - slog.Debug("pty timeout kill error", "error", err) - } - rpcCancel() - cancel() - case <-ctx.Done(): - } - }() - wg.Wait() } @@ -391,15 +364,3 @@ func runPtyLoop( func newPtyTag() string { return "pty-" + id.NewPtyTag() } - -// resetTimer safely resets a timer by stopping it and draining the channel -// before resetting, avoiding the race documented in time.Timer.Reset. -func resetTimer(t *time.Timer, d time.Duration) { - if !t.Stop() { - select { - case <-t.C: - default: - } - } - t.Reset(d) -} diff --git a/internal/api/openapi.yaml b/internal/api/openapi.yaml index 031cefd..984a37d 100644 --- a/internal/api/openapi.yaml +++ b/internal/api/openapi.yaml @@ -1386,7 +1386,6 @@ paths: PTY data (input and output) is base64-encoded because it contains raw terminal bytes (escape sequences, control codes) that are not valid UTF-8. - Sessions have a 120-second inactivity timeout (reset on input/resize). Sessions persist across WebSocket disconnections — the process keeps running in the capsule. Use the `tag` from the "started" response to reconnect later. diff --git a/internal/recipe/context.go b/internal/recipe/context.go index 820e717..3a64059 100644 --- a/internal/recipe/context.go +++ b/internal/recipe/context.go @@ -115,8 +115,11 @@ func expandEnv(s string, vars map[string]string) string { }) } -// shellescape wraps s in single quotes, escaping any embedded single quotes. +// Shellescape wraps s in single quotes, escaping any embedded single quotes. // This is POSIX-safe for paths, env values, and shell commands. -func shellescape(s string) string { +func Shellescape(s string) string { return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" } + +// shellescape is the package-internal alias for Shellescape. +func shellescape(s string) string { return Shellescape(s) } diff --git a/internal/recipe/executor.go b/internal/recipe/executor.go index 38a8b12..ffecf04 100644 --- a/internal/recipe/executor.go +++ b/internal/recipe/executor.go @@ -193,6 +193,15 @@ func execUser( entry := execRawShell(ctx, st.Raw, sandboxID, phase, step, 30*time.Second, execFn, script) if entry.Ok { bctx.User = username + // Update HOME so ~ expands correctly in subsequent RUN/WORKDIR steps. + if bctx.EnvVars == nil { + bctx.EnvVars = make(map[string]string) + } + if username == "root" { + bctx.EnvVars["HOME"] = "/root" + } else { + bctx.EnvVars["HOME"] = "/home/" + username + } } return entry, entry.Ok } diff --git a/internal/sandbox/manager.go b/internal/sandbox/manager.go index 3dde053..576a3e9 100644 --- a/internal/sandbox/manager.go +++ b/internal/sandbox/manager.go @@ -901,6 +901,18 @@ func (m *Manager) FlattenRootfs(ctx context.Context, sandboxID string, teamID, t return 0, fmt.Errorf("sandbox %s not found", sandboxID) } + // Flush guest page cache to disk before stopping the VM. Without this, + // files written by the build (e.g. pip-installed packages) may exist in the + // guest's page cache but not yet on the dm block device — flatten would then + // capture 0-byte files. + func() { + syncCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + if _, err := sb.client.Exec(syncCtx, "/bin/sync"); err != nil { + slog.Warn("flatten: guest sync failed (non-fatal)", "id", sb.ID, "error", err) + } + }() + // Stop the VM but keep the dm device alive for flattening. m.stopSampler(sb) if err := m.vm.Destroy(ctx, sb.ID); err != nil { diff --git a/internal/service/build.go b/internal/service/build.go index 563c8cd..92d826d 100644 --- a/internal/service/build.go +++ b/internal/service/build.go @@ -444,7 +444,7 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { return } log.Info("running healthcheck", "cmd", hc.Cmd, "interval", hc.Interval, "timeout", hc.Timeout, "start_period", hc.StartPeriod, "retries", hc.Retries) - if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc); err != nil { + if err := s.waitForHealthcheck(buildCtx, agent, sandboxIDStr, hc, templateDefaultUser); err != nil { s.destroySandbox(buildCtx, agent, sandboxIDStr) if buildCtx.Err() != nil { return @@ -544,7 +544,14 @@ func (s *BuildService) executeBuild(ctx context.Context, buildIDStr string) { // During the start period, failures are not counted toward the retry budget. // Returns nil on the first successful check, or an error if retries are // exhausted, the deadline passes, or the context is cancelled. -func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig) error { +func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentClient, sandboxIDStr string, hc recipe.HealthcheckConfig, user string) error { + // Wrap the healthcheck command with su when a non-root user is set, so that + // ~ expands to the correct home directory and the process runs with the + // right UID (matching the template's default user). + cmd := hc.Cmd + if user != "" && user != "root" { + cmd = "su " + recipe.Shellescape(user) + " -s /bin/sh -c " + recipe.Shellescape(hc.Cmd) + } ticker := time.NewTicker(hc.Interval) defer ticker.Stop() @@ -571,7 +578,7 @@ func (s *BuildService) waitForHealthcheck(ctx context.Context, agent buildAgentC resp, err := agent.Exec(execCtx, connect.NewRequest(&pb.ExecRequest{ SandboxId: sandboxIDStr, Cmd: "/bin/sh", - Args: []string{"-c", hc.Cmd}, + Args: []string{"-c", cmd}, TimeoutSec: int32(hc.Timeout.Seconds()), })) cancel() diff --git a/recipes/code-runner-beta.healthcheck b/recipes/code-runner-beta.healthcheck new file mode 100644 index 0000000..186da39 --- /dev/null +++ b/recipes/code-runner-beta.healthcheck @@ -0,0 +1 @@ +--interval=5s --timeout=5s --start-period=60s --retries=5 curl -sf http://127.0.0.1:8888/api/status diff --git a/recipes/code-runner-beta.recipefile b/recipes/code-runner-beta.recipefile new file mode 100644 index 0000000..dc96779 --- /dev/null +++ b/recipes/code-runner-beta.recipefile @@ -0,0 +1,9 @@ +RUN --timeout=5m sudo apt install -y python3 python3-pip python3-venv +ENV PYTHONUNBUFFERED=1 + +RUN python3 -m venv ~/jupyter-env +RUN --timeout=5m ~/jupyter-env/bin/pip install --upgrade pip +RUN --timeout=5m ~/jupyter-env/bin/pip install jupyter-server ipykernel +RUN --timeout=5m ~/jupyter-env/bin/python -m ipykernel install --sys-prefix + +START ~/jupyter-env/bin/jupyter server --ServerApp.ip=0.0.0.0 --ServerApp.port=8888 --ServerApp.token='' --ServerApp.password='' --ServerApp.allow_origin='*' --ServerApp.disable_check_xsrf=True --no-browser --log-level=INFO diff --git a/recipes/python-interpreter-v0-beta.healthcheck b/recipes/python-interpreter-v0-beta.healthcheck deleted file mode 100644 index ca2555c..0000000 --- a/recipes/python-interpreter-v0-beta.healthcheck +++ /dev/null @@ -1 +0,0 @@ ---interval=5s --timeout=3s --start-period=3s --retries=3 python3 -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8888/api/status', timeout=3)" diff --git a/recipes/python-interpreter-v0-beta.recipefile b/recipes/python-interpreter-v0-beta.recipefile deleted file mode 100644 index e83f5da..0000000 --- a/recipes/python-interpreter-v0-beta.recipefile +++ /dev/null @@ -1,7 +0,0 @@ -RUN apt-get install -y --no-install-recommends python3 python3-pip python3-venv -RUN python3 -m venv /opt/venv -ENV PATH=/opt/venv/bin:$PATH - -RUN --timeout=5m pip install --no-cache-dir jupyter-server ipykernel - -START jupyter server --ServerApp.ip=0.0.0.0 --ServerApp.port=8888 --ServerApp.token='' --ServerApp.allow_origin='*' --ServerApp.disable_check_xsrf=True --no-browser --allow-root