1
0
forked from wrenn/wrenn

fix: prevent sandbox halt after resume by fixing HTTP/2 HOL blocking and adding timeouts

Disable HTTP/2 on both host agent server and CP→agent transport — multiplexing
caused head-of-line blocking when a slow sandbox RPC stalled the shared connection.
Add ResponseHeaderTimeout to envd HTTP clients. Merge SetDefaults into Resume's
PostInit call to eliminate an extra round-trip that could hang on a stale connection.
This commit is contained in:
2026-05-02 13:48:51 +06:00
parent 7ef9a64613
commit bb582deefa
5 changed files with 30 additions and 16 deletions

View File

@ -109,18 +109,11 @@ func (s *Server) ResumeSandbox(
req *connect.Request[pb.ResumeSandboxRequest],
) (*connect.Response[pb.ResumeSandboxResponse], error) {
msg := req.Msg
sb, err := s.mgr.Resume(ctx, msg.SandboxId, int(msg.TimeoutSec), msg.KernelVersion)
sb, err := s.mgr.Resume(ctx, msg.SandboxId, int(msg.TimeoutSec), msg.KernelVersion, msg.DefaultUser, msg.DefaultEnv)
if err != nil {
return nil, connect.NewError(connect.CodeInternal, err)
}
// Apply template defaults (user, env vars) if provided.
if msg.DefaultUser != "" || len(msg.DefaultEnv) > 0 {
if err := s.mgr.SetDefaults(ctx, sb.ID, msg.DefaultUser, msg.DefaultEnv); err != nil {
slog.Warn("failed to set sandbox defaults on resume", "sandbox", sb.ID, "error", err)
}
}
return connect.NewResponse(&pb.ResumeSandboxResponse{
SandboxId: sb.ID,
Status: string(sb.Status),