From 6f65c1c79da0f0a77ef4fbcd8c6a54687eb3921b Mon Sep 17 00:00:00 2001 From: Maximilian Moehl Date: Wed, 10 May 2023 13:43:43 +0200 Subject: [PATCH] refactor: split error handling and retry logic With this commit `isRetriable` no longer overwrites / wraps the error that is passed to it. This was done to accommodate context from the circumstances in which the error occurred into the error itself to be able to match on those later on. This mechanism has proven to cause bugs and increase overall complexity by abusing the error type. Instead `isRetriable` now only returns whether a certain combination of parameters is considered retry-able, either because the circumstances allow for it or because the error matches one of the retry-able error classifiers. Resovles: cloudfoundry/routing-release#321 --- proxy/fails/basic_classifiers.go | 12 --- proxy/fails/classifier_group.go | 7 +- proxy/fails/classifier_group_test.go | 12 --- proxy/round_tripper/error_handler_test.go | 65 -------------- proxy/round_tripper/proxy_round_tripper.go | 16 ++-- .../round_tripper/proxy_round_tripper_test.go | 90 ------------------- 6 files changed, 12 insertions(+), 190 deletions(-) diff --git a/proxy/fails/basic_classifiers.go b/proxy/fails/basic_classifiers.go index d54d4465..5875406f 100644 --- a/proxy/fails/basic_classifiers.go +++ b/proxy/fails/basic_classifiers.go @@ -9,10 +9,6 @@ import ( "strings" ) -var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)") - -var IncompleteRequestError = errors.New("incomplete request") - var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool { return errors.As(err, &tls.RecordHeaderError{}) }) @@ -78,11 +74,3 @@ var UntrustedCert = ClassifierFunc(func(err error) bool { return false } }) - -var IdempotentRequestEOF = ClassifierFunc(func(err error) bool { - return errors.Is(err, IdempotentRequestEOFError) -}) - -var IncompleteRequest = ClassifierFunc(func(err error) bool { - return errors.Is(err, IncompleteRequestError) -}) diff --git a/proxy/fails/classifier_group.go b/proxy/fails/classifier_group.go index b790c014..cb6b54f2 100644 --- a/proxy/fails/classifier_group.go +++ b/proxy/fails/classifier_group.go @@ -10,6 +10,11 @@ type ClassifierGroup []Classifier // // Otherwise, there’s risk of a mutating non-idempotent request (e.g. send // payment) being silently retried without the client knowing. +// +// IMPORTANT: to truly determine whether a request is retry-able the function +// round_tripper.isRetrieable must be used. It includes additional checks that +// allow requests to be retried more often than it is allowed by the +// classifiers. var RetriableClassifiers = ClassifierGroup{ Dial, AttemptedTLSWithNonTLSBackend, @@ -19,8 +24,6 @@ var RetriableClassifiers = ClassifierGroup{ RemoteHandshakeTimeout, UntrustedCert, ExpiredOrNotYetValidCertFailure, - IdempotentRequestEOF, - IncompleteRequest, } var FailableClassifiers = ClassifierGroup{ diff --git a/proxy/fails/classifier_group_test.go b/proxy/fails/classifier_group_test.go index 649414b5..3ca5f712 100644 --- a/proxy/fails/classifier_group_test.go +++ b/proxy/fails/classifier_group_test.go @@ -4,7 +4,6 @@ import ( "crypto/tls" "crypto/x509" "errors" - "fmt" "net" "code.cloudfoundry.org/gorouter/proxy/fails" @@ -33,25 +32,14 @@ var _ = Describe("ClassifierGroup", func() { rc := fails.RetriableClassifiers Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue()) Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue()) Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue()) Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue()) Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue()) Expect(rc.Classify(x509.HostnameError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue()) Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue()) Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue()) Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse()) - Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue()) - Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue()) }) }) diff --git a/proxy/round_tripper/error_handler_test.go b/proxy/round_tripper/error_handler_test.go index e082790e..78140113 100644 --- a/proxy/round_tripper/error_handler_test.go +++ b/proxy/round_tripper/error_handler_test.go @@ -5,7 +5,6 @@ import ( "crypto/tls" "crypto/x509" "errors" - "fmt" "net" "net/http/httptest" @@ -137,22 +136,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("HostnameMismatch wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := x509.HostnameError{Host: "the wrong one"} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 503 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(503)) - }) - - It("emits a backend_invalid_id metric", func() { - Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1)) - }) - }) - Context("Untrusted Cert", func() { BeforeEach(func() { err = x509.UnknownAuthorityError{} @@ -168,22 +151,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Untrusted Cert wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := x509.UnknownAuthorityError{} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 526 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(526)) - }) - - It("emits a backend_invalid_tls_cert metric", func() { - Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1)) - }) - }) - Context("Attempted TLS with non-TLS backend error", func() { BeforeEach(func() { err = tls.RecordHeaderError{Msg: "bad handshake"} @@ -199,22 +166,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 525 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(525)) - }) - - It("emits a backend_tls_handshake_failed metric", func() { - Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1)) - }) - }) - Context("Remote handshake failure", func() { BeforeEach(func() { err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")} @@ -230,22 +181,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Remote handshake failure wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 525 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(525)) - }) - - It("emits a backend_tls_handshake_failed metric", func() { - Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1)) - }) - }) - Context("Context Cancelled Error", func() { BeforeEach(func() { err = context.Canceled diff --git a/proxy/round_tripper/proxy_round_tripper.go b/proxy/round_tripper/proxy_round_tripper.go index b3a94c93..66742595 100644 --- a/proxy/round_tripper/proxy_round_tripper.go +++ b/proxy/round_tripper/proxy_round_tripper.go @@ -3,7 +3,6 @@ package round_tripper import ( "context" "errors" - "fmt" "io" "io/ioutil" "net/http" @@ -177,7 +176,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response if err != nil { reqInfo.FailedAttempts++ reqInfo.LastFailedAttemptFinishedAt = time.Now() - retriable, err := rt.isRetriable(request, err, trace) + retriable := rt.isRetriable(request, err, trace) logger.Error("backend-endpoint-failed", zap.Error(err), @@ -226,7 +225,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response if err != nil { reqInfo.FailedAttempts++ reqInfo.LastFailedAttemptFinishedAt = time.Now() - retriable, err := rt.isRetriable(request, err, trace) + retriable := rt.isRetriable(request, err, trace) logger.Error( "route-service-connection-failed", @@ -482,24 +481,23 @@ func isIdempotent(request *http.Request) bool { return false } -func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) (bool, error) { +func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) bool { // if the context has been cancelled we do not perform further retries if request.Context().Err() != nil { - return false, fmt.Errorf("%w (%w)", request.Context().Err(), err) + return false } // io.EOF errors are considered safe to retry for certain requests // Replace the error here to track this state when classifying later. if err == io.EOF && isIdempotent(request) { - err = fails.IdempotentRequestEOFError + return true } // We can retry for sure if we never obtained a connection // since there is no way any data was transmitted. If headers could not // be written in full, the request should also be safe to retry. if !trace.GotConn() || !trace.WroteHeaders() { - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, err) + return true } - retriable := rt.retriableClassifier.Classify(err) - return retriable, err + return rt.retriableClassifier.Classify(err) } diff --git a/proxy/round_tripper/proxy_round_tripper_test.go b/proxy/round_tripper/proxy_round_tripper_test.go index 92543131..c4178f83 100644 --- a/proxy/round_tripper/proxy_round_tripper_test.go +++ b/proxy/round_tripper/proxy_round_tripper_test.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "fmt" - "io" "net" "net/http" "net/http/httptest" @@ -23,7 +22,6 @@ import ( "code.cloudfoundry.org/gorouter/config" "code.cloudfoundry.org/gorouter/handlers" "code.cloudfoundry.org/gorouter/metrics/fakes" - "code.cloudfoundry.org/gorouter/proxy/fails" "code.cloudfoundry.org/gorouter/proxy/handler" "code.cloudfoundry.org/gorouter/proxy/round_tripper" "code.cloudfoundry.org/gorouter/proxy/utils" @@ -434,94 +432,6 @@ var _ = Describe("ProxyRoundTripper", func() { }) }) - DescribeTable("when the backend fails with an empty response error (io.EOF)", - func(reqBody io.ReadCloser, getBodyIsNil bool, reqMethod string, headers map[string]string, classify fails.ClassifierFunc, expectRetry bool) { - badResponse := &http.Response{ - Header: make(map[string][]string), - } - badResponse.Header.Add(handlers.VcapRequestIdHeader, "some-request-id") - - // The first request fails with io.EOF, the second (if retried) succeeds - transport.RoundTripStub = func(*http.Request) (*http.Response, error) { - switch transport.RoundTripCallCount() { - case 1: - return nil, io.EOF - case 2: - return &http.Response{StatusCode: http.StatusTeapot}, nil - default: - return nil, nil - } - } - - retriableClassifier.ClassifyStub = classify - req.Method = reqMethod - req.Body = reqBody - if !getBodyIsNil { - req.GetBody = func() (io.ReadCloser, error) { - return new(testBody), nil - } - } - if headers != nil { - for key, value := range headers { - req.Header.Add(key, value) - } - } - - res, err := proxyRoundTripper.RoundTrip(req) - - if expectRetry { - Expect(err).NotTo(HaveOccurred()) - Expect(transport.RoundTripCallCount()).To(Equal(2)) - Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1)) - Expect(res.StatusCode).To(Equal(http.StatusTeapot)) - } else { - Expect(errors.Is(err, io.EOF)).To(BeTrue()) - Expect(transport.RoundTripCallCount()).To(Equal(1)) - Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1)) - } - }, - - Entry("POST, body is empty: does not retry", nil, true, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is not empty and GetBody is non-nil: does not retry", reqBody, false, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is not empty: does not retry", reqBody, true, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody: does not retry", http.NoBody, true, "POST", nil, fails.IdempotentRequestEOF, false), - - Entry("POST, body is empty, X-Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty and GetBody is non-nil, X-Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty, X-Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody, X-Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - - Entry("POST, body is empty, Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty and GetBody is non-nil, Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty, Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody, Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - - Entry("GET, body is empty: attempts retry", nil, true, "GET", nil, fails.IncompleteRequest, true), - Entry("GET, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "GET", nil, fails.IncompleteRequest, true), - Entry("GET, body is not empty: does not retry", reqBody, true, "GET", nil, fails.IdempotentRequestEOF, false), - Entry("GET, body is http.NoBody: does not retry", http.NoBody, true, "GET", nil, fails.IdempotentRequestEOF, false), - - Entry("TRACE, body is empty: attempts retry", nil, true, "TRACE", nil, fails.IncompleteRequest, true), - Entry("TRACE, body is not empty: does not retry", reqBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false), - Entry("TRACE, body is http.NoBody: does not retry", http.NoBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false), - Entry("TRACE, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "TRACE", nil, fails.IncompleteRequest, true), - - Entry("HEAD, body is empty: attempts retry", nil, true, "HEAD", nil, fails.IncompleteRequest, true), - Entry("HEAD, body is not empty: does not retry", reqBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false), - Entry("HEAD, body is http.NoBody: does not retry", http.NoBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false), - Entry("HEAD, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "HEAD", nil, fails.IncompleteRequest, true), - - Entry("OPTIONS, body is empty: attempts retry", nil, true, "OPTIONS", nil, fails.IncompleteRequest, true), - Entry("OPTIONS, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "OPTIONS", nil, fails.IncompleteRequest, true), - Entry("OPTIONS, body is not empty: does not retry", reqBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false), - Entry("OPTIONS, body is http.NoBody: does not retry", http.NoBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false), - - Entry(", body is empty: attempts retry", nil, true, "", nil, fails.IncompleteRequest, true), - Entry(", body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "", nil, fails.IncompleteRequest, true), - Entry(", body is not empty: does not retry", reqBody, true, "", nil, fails.IdempotentRequestEOF, false), - Entry(", body is http.NoBody: does not retry", http.NoBody, true, "", nil, fails.IdempotentRequestEOF, false), - ) - Context("when there are no more endpoints available", func() { BeforeEach(func() { removed := routePool.Remove(endpoint)