From 01d10add79e97373ce6f9648abee6daa565b696b Mon Sep 17 00:00:00 2001 From: Nick Stenning Date: Mon, 27 Nov 2023 20:27:16 +0100 Subject: [PATCH] Add a workaround for HTTP/2 connection reuse Sometimes we're seeing HTTP/2 connections be reused for up to ~16 minutes despite being completely defunct -- i.e. we're receiving no response from the remote end. We believe that this is in part a result of Go bug https://github.com/golang/go/issues/59690 in which failed connections are not correctly discarded under certain circumstances. This commit enables the http2 transport's `ReadIdleTimeout` function, which will send an h2 "ping" frame on any connection that's been idle longer than the value of `ReadIdleTimeout`. If it doesn't hear back in `PingTimeout`, it will throw away the connection. This seems like a reasonable thing to leave on in general. --- go.mod | 2 +- httpclient/httpclient.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 95bdec8..1fe3941 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( go.opentelemetry.io/otel/sdk v1.19.0 go.opentelemetry.io/otel/trace v1.19.0 go.uber.org/zap v1.26.0 + golang.org/x/net v0.17.0 golang.org/x/tools v0.14.0 gotest.tools/gotestsum v1.11.0 ) @@ -217,7 +218,6 @@ require ( golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect golang.org/x/exp/typeparams v0.0.0-20230307190834-24139beb5833 // indirect golang.org/x/mod v0.13.0 // indirect - golang.org/x/net v0.17.0 // indirect golang.org/x/sync v0.4.0 // indirect golang.org/x/sys v0.13.0 // indirect golang.org/x/term v0.13.0 // indirect diff --git a/httpclient/httpclient.go b/httpclient/httpclient.go index 377c158..5a8fa0b 100644 --- a/httpclient/httpclient.go +++ b/httpclient/httpclient.go @@ -13,6 +13,7 @@ import ( "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel/propagation" + "golang.org/x/net/http2" ) const ConnectTimeout = 5 * time.Second @@ -72,6 +73,7 @@ func DefaultPooledTransport() *http.Transport { ExpectContinueTimeout: 1 * time.Second, MaxIdleConnsPerHost: runtime.GOMAXPROCS(0) + 1, } + configureHTTP2(transport) return transport } @@ -93,3 +95,29 @@ func DefaultPooledClient() *http.Client { Transport: DefaultPooledRoundTripper(), } } + +func configureHTTP2(t *http.Transport) { + h2t, err := http2.ConfigureTransports(t) + if err != nil { + // ConfigureTransports should only ever return an error if the transport + // passed in has already been configured for http2, which shouldn't be + // possible for us. + panic(err) + } + + // Send a ping frame on any connection that's been idle for more than 10 + // seconds. + // + // The default is to never do this. We set it primarily as a workaround for + // + // https://github.com/golang/go/issues/59690 + // + // where a connection that goes AWOL will not be correctly terminated and + // removed from the connection pool under certain circumstances. Together + // `ReadIdleTimeout` and `PingTimeout` should ensure that we remove defunct + // connections in ~20 seconds. + h2t.ReadIdleTimeout = 10 * time.Second + // Give the other end 10 seconds to respond. If we don't hear back, we'll + // close the connection. + h2t.PingTimeout = 10 * time.Second +}