From f9a38b29c7f35f9eb8f4ea00844ef96b8091e14b Mon Sep 17 00:00:00 2001 From: Dani Louca <59848726+dloucasfx@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:10:04 -0500 Subject: [PATCH] Introduce HTTP2 health check transport options (#9022) **Description:** This PR introduces options to enable `http/2` health check by exposing `HTTP2ReadIdleTimeout` and `HTTP2PingTimeout` The golang issues are: https://github.com/golang/go/issues/59690 https://github.com/golang/go/issues/36026 In summary, if due to environmental issue the underlying tcp connection used by the http/2 client in the exporter became unstable/unusable/unreachable, unlike http/1, the http/2 client does not forcibly close the connection and redial a new one, instead it keeps using it for 15 minutes (default value of OS `tcp_retries2` ) until the OS cleans it up and a new tcp connection gets established. From OTEL user perspective, one will see a spike in export failures/timeouts for ~15 minutes, this will happen for every connection that got into a bad state, after 15 minutes things will recover until next time the tcp connection gets into a bad state. **Testing:** - Run OTEL with one of the exporters that uses HTTP/2 client, example `signalfx` exporter - For simplicity use a single pipeline/exporter - In a different shell, run this to watch the tcp state of the established connection ``` while (true); do echo date; sudo netstat -anp | grep -E '' | sort -k 5; sleep 2; done ``` - From the netstat, take a note of the source port and the source IP address - replace <> from previous step `sudo iptables -A OUTPUT -s -p tcp --sport -j DROP` - Note how the OTEL exporter export starts timing out Expected Result: - A new connection should be established, similarly to http/1 and exports should succeed Actual Result: - The exports keep failing for ~ 15 minutes or for whatever the OS `tcp_retries2` is configured to - After 15 minutes, a new tcp connection is created and exports start working **Documentation:** Readme is updated with new settings Signed-off-by: Dani Louca --- .chloggen/http2ping.yaml | 25 +++++++++ config/confighttp/README.md | 2 + config/confighttp/confighttp.go | 20 +++++++ config/confighttp/confighttp_test.go | 82 +++++++++++++++++++--------- 4 files changed, 102 insertions(+), 27 deletions(-) create mode 100755 .chloggen/http2ping.yaml diff --git a/.chloggen/http2ping.yaml b/.chloggen/http2ping.yaml new file mode 100755 index 00000000000..b6f2223ed7d --- /dev/null +++ b/.chloggen/http2ping.yaml @@ -0,0 +1,25 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: config/confighttp + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Exposes http/2 transport settings to enable health check and workaround golang http/2 issue https://github.com/golang/go/issues/59690 + +# One or more tracking issues or pull requests related to the change +issues: [9022] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] \ No newline at end of file diff --git a/config/confighttp/README.md b/config/confighttp/README.md index a3794ceeed1..0cc0503f539 100644 --- a/config/confighttp/README.md +++ b/config/confighttp/README.md @@ -29,6 +29,8 @@ README](../configtls/README.md). - [`idle_conn_timeout`](https://golang.org/pkg/net/http/#Transport) - [`auth`](../configauth/README.md) - [`disable_keep_alives`](https://golang.org/pkg/net/http/#Transport) +- [`http2_read_idle_timeout`](https://pkg.go.dev/golang.org/x/net/http2#Transport) +- [`http2_ping_timeout`](https://pkg.go.dev/golang.org/x/net/http2#Transport) Example: diff --git a/config/confighttp/confighttp.go b/config/confighttp/confighttp.go index c83db25d4b6..e41a64cc124 100644 --- a/config/confighttp/confighttp.go +++ b/config/confighttp/confighttp.go @@ -6,6 +6,7 @@ package confighttp // import "go.opentelemetry.io/collector/config/confighttp" import ( "crypto/tls" "errors" + "fmt" "io" "net" "net/http" @@ -86,6 +87,16 @@ type HTTPClientSettings struct { // connection for every request. Before enabling this option please consider whether changes // to idle connection settings can achieve your goal. DisableKeepAlives bool `mapstructure:"disable_keep_alives"` + + // This is needed in case you run into + // https://github.com/golang/go/issues/59690 + // https://github.com/golang/go/issues/36026 + // HTTP2ReadIdleTimeout if the connection has been idle for the configured value send a ping frame for health check + // 0s means no health check will be performed. + HTTP2ReadIdleTimeout time.Duration `mapstructure:"http2_read_idle_timeout"` + // HTTP2PingTimeout if there's no response to the ping within the configured value, the connection will be closed. + // If not set or set to 0, it defaults to 15s. + HTTP2PingTimeout time.Duration `mapstructure:"http2_ping_timeout"` } // NewDefaultHTTPClientSettings returns HTTPClientSettings type object with @@ -147,6 +158,15 @@ func (hcs *HTTPClientSettings) ToClient(host component.Host, settings component. transport.DisableKeepAlives = hcs.DisableKeepAlives + if hcs.HTTP2ReadIdleTimeout > 0 { + transport2, transportErr := http2.ConfigureTransports(transport) + if transportErr != nil { + return nil, fmt.Errorf("failed to configure http2 transport: %w", transportErr) + } + transport2.ReadIdleTimeout = hcs.HTTP2ReadIdleTimeout + transport2.PingTimeout = hcs.HTTP2PingTimeout + } + clientTransport := (http.RoundTripper)(transport) // The Auth RoundTripper should always be the innermost to ensure that diff --git a/config/confighttp/confighttp_test.go b/config/confighttp/confighttp_test.go index 43a9b865010..3d3bc6d8bc7 100644 --- a/config/confighttp/confighttp_test.go +++ b/config/confighttp/confighttp_test.go @@ -53,6 +53,7 @@ func TestAllHTTPClientSettings(t *testing.T) { maxIdleConnsPerHost := 40 maxConnsPerHost := 45 idleConnTimeout := 30 * time.Second + http2PingTimeout := 5 * time.Second tests := []struct { name string settings HTTPClientSettings @@ -65,15 +66,17 @@ func TestAllHTTPClientSettings(t *testing.T) { TLSSetting: configtls.TLSClientSetting{ Insecure: false, }, - ReadBufferSize: 1024, - WriteBufferSize: 512, - MaxIdleConns: &maxIdleConns, - MaxIdleConnsPerHost: &maxIdleConnsPerHost, - MaxConnsPerHost: &maxConnsPerHost, - IdleConnTimeout: &idleConnTimeout, - CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, - Compression: "", - DisableKeepAlives: true, + ReadBufferSize: 1024, + WriteBufferSize: 512, + MaxIdleConns: &maxIdleConns, + MaxIdleConnsPerHost: &maxIdleConnsPerHost, + MaxConnsPerHost: &maxConnsPerHost, + IdleConnTimeout: &idleConnTimeout, + CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, + Compression: "", + DisableKeepAlives: true, + HTTP2ReadIdleTimeout: idleConnTimeout, + HTTP2PingTimeout: http2PingTimeout, }, shouldError: false, }, @@ -84,15 +87,17 @@ func TestAllHTTPClientSettings(t *testing.T) { TLSSetting: configtls.TLSClientSetting{ Insecure: false, }, - ReadBufferSize: 1024, - WriteBufferSize: 512, - MaxIdleConns: &maxIdleConns, - MaxIdleConnsPerHost: &maxIdleConnsPerHost, - MaxConnsPerHost: &maxConnsPerHost, - IdleConnTimeout: &idleConnTimeout, - CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, - Compression: "none", - DisableKeepAlives: true, + ReadBufferSize: 1024, + WriteBufferSize: 512, + MaxIdleConns: &maxIdleConns, + MaxIdleConnsPerHost: &maxIdleConnsPerHost, + MaxConnsPerHost: &maxConnsPerHost, + IdleConnTimeout: &idleConnTimeout, + CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, + Compression: "none", + DisableKeepAlives: true, + HTTP2ReadIdleTimeout: idleConnTimeout, + HTTP2PingTimeout: http2PingTimeout, }, shouldError: false, }, @@ -103,15 +108,38 @@ func TestAllHTTPClientSettings(t *testing.T) { TLSSetting: configtls.TLSClientSetting{ Insecure: false, }, - ReadBufferSize: 1024, - WriteBufferSize: 512, - MaxIdleConns: &maxIdleConns, - MaxIdleConnsPerHost: &maxIdleConnsPerHost, - MaxConnsPerHost: &maxConnsPerHost, - IdleConnTimeout: &idleConnTimeout, - CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, - Compression: "gzip", - DisableKeepAlives: true, + ReadBufferSize: 1024, + WriteBufferSize: 512, + MaxIdleConns: &maxIdleConns, + MaxIdleConnsPerHost: &maxIdleConnsPerHost, + MaxConnsPerHost: &maxConnsPerHost, + IdleConnTimeout: &idleConnTimeout, + CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, + Compression: "gzip", + DisableKeepAlives: true, + HTTP2ReadIdleTimeout: idleConnTimeout, + HTTP2PingTimeout: http2PingTimeout, + }, + shouldError: false, + }, + { + name: "all_valid_settings_http2_health_check", + settings: HTTPClientSettings{ + Endpoint: "localhost:1234", + TLSSetting: configtls.TLSClientSetting{ + Insecure: false, + }, + ReadBufferSize: 1024, + WriteBufferSize: 512, + MaxIdleConns: &maxIdleConns, + MaxIdleConnsPerHost: &maxIdleConnsPerHost, + MaxConnsPerHost: &maxConnsPerHost, + IdleConnTimeout: &idleConnTimeout, + CustomRoundTripper: func(next http.RoundTripper) (http.RoundTripper, error) { return next, nil }, + Compression: "gzip", + DisableKeepAlives: true, + HTTP2ReadIdleTimeout: idleConnTimeout, + HTTP2PingTimeout: http2PingTimeout, }, shouldError: false, },