1
0
forked from wrenn/wrenn

fix: prevent sandbox halt after resume by fixing HTTP/2 HOL blocking and adding timeouts

Disable HTTP/2 on both host agent server and CP→agent transport — multiplexing
caused head-of-line blocking when a slow sandbox RPC stalled the shared connection.
Add ResponseHeaderTimeout to envd HTTP clients. Merge SetDefaults into Resume's
PostInit call to eliminate an extra round-trip that could hang on a stale connection.
This commit is contained in:
2026-05-02 13:48:51 +06:00
parent 7ef9a64613
commit bb582deefa
5 changed files with 30 additions and 16 deletions

View File

@ -39,7 +39,19 @@ func NewHostClientPool() *HostClientPool {
// (use auth.CPClientTLSConfig to construct it).
func NewHostClientPoolTLS(tlsCfg *tls.Config) *HostClientPool {
transport := &http.Transport{
TLSClientConfig: tlsCfg,
TLSClientConfig: tlsCfg,
ForceAttemptHTTP2: false,
// Empty non-nil map disables HTTP/2 ALPN negotiation, forcing HTTP/1.1.
// Connect RPC works over HTTP/1.1; HTTP/2 multiplexing causes HOL
// blocking when a single slow sandbox RPC stalls the shared connection.
TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper),
MaxIdleConnsPerHost: 20,
IdleConnTimeout: 90 * time.Second,
ResponseHeaderTimeout: 45 * time.Second,
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
}
return &HostClientPool{
clients: make(map[string]hostagentv1connect.HostAgentServiceClient),