Skip to content

Commit

Permalink
refactor: split error handling and retry logic
Browse files Browse the repository at this point in the history
With this commit `isRetriable` no longer overwrites / wraps the error
that is passed to it. This was done to accommodate context from the
circumstances in which the error occurred into the error itself to be
able to match on those later on. This mechanism has proven to cause bugs
and increase overall complexity by abusing the error type.

Instead `isRetriable` now only returns whether a certain combination of
parameters is considered retry-able, either because the circumstances
allow for it or because the error matches one of the retry-able error
classifiers.

Resovles: cloudfoundry/routing-release#321
  • Loading branch information
maxmoehl committed May 23, 2023
1 parent 0f9b398 commit 6f65c1c
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 190 deletions.
12 changes: 0 additions & 12 deletions proxy/fails/basic_classifiers.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ import (
"strings"
)

var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)")

var IncompleteRequestError = errors.New("incomplete request")

var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool {
return errors.As(err, &tls.RecordHeaderError{})
})
Expand Down Expand Up @@ -78,11 +74,3 @@ var UntrustedCert = ClassifierFunc(func(err error) bool {
return false
}
})

var IdempotentRequestEOF = ClassifierFunc(func(err error) bool {
return errors.Is(err, IdempotentRequestEOFError)
})

var IncompleteRequest = ClassifierFunc(func(err error) bool {
return errors.Is(err, IncompleteRequestError)
})
7 changes: 5 additions & 2 deletions proxy/fails/classifier_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ type ClassifierGroup []Classifier
//
// Otherwise, there’s risk of a mutating non-idempotent request (e.g. send
// payment) being silently retried without the client knowing.
//
// IMPORTANT: to truly determine whether a request is retry-able the function
// round_tripper.isRetrieable must be used. It includes additional checks that
// allow requests to be retried more often than it is allowed by the
// classifiers.
var RetriableClassifiers = ClassifierGroup{
Dial,
AttemptedTLSWithNonTLSBackend,
Expand All @@ -19,8 +24,6 @@ var RetriableClassifiers = ClassifierGroup{
RemoteHandshakeTimeout,
UntrustedCert,
ExpiredOrNotYetValidCertFailure,
IdempotentRequestEOF,
IncompleteRequest,
}

var FailableClassifiers = ClassifierGroup{
Expand Down
12 changes: 0 additions & 12 deletions proxy/fails/classifier_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net"

"code.cloudfoundry.org/gorouter/proxy/fails"
Expand Down Expand Up @@ -33,25 +32,14 @@ var _ = Describe("ClassifierGroup", func() {
rc := fails.RetriableClassifiers

Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue())
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue())
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue())
Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue())
Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue())
Expect(rc.Classify(x509.HostnameError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue())
Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue())
Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse())
Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue())
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
})
})

Expand Down
65 changes: 0 additions & 65 deletions proxy/round_tripper/error_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net"
"net/http/httptest"

Expand Down Expand Up @@ -137,22 +136,6 @@ var _ = Describe("HandleError", func() {
})
})

Context("HostnameMismatch wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := x509.HostnameError{Host: "the wrong one"}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 503 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(503))
})

It("emits a backend_invalid_id metric", func() {
Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1))
})
})

Context("Untrusted Cert", func() {
BeforeEach(func() {
err = x509.UnknownAuthorityError{}
Expand All @@ -168,22 +151,6 @@ var _ = Describe("HandleError", func() {
})
})

Context("Untrusted Cert wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := x509.UnknownAuthorityError{}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 526 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(526))
})

It("emits a backend_invalid_tls_cert metric", func() {
Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1))
})
})

Context("Attempted TLS with non-TLS backend error", func() {
BeforeEach(func() {
err = tls.RecordHeaderError{Msg: "bad handshake"}
Expand All @@ -199,22 +166,6 @@ var _ = Describe("HandleError", func() {
})
})

Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 525 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(525))
})

It("emits a backend_tls_handshake_failed metric", func() {
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
})
})

Context("Remote handshake failure", func() {
BeforeEach(func() {
err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
Expand All @@ -230,22 +181,6 @@ var _ = Describe("HandleError", func() {
})
})

Context("Remote handshake failure wrapped in IncompleteRequestError", func() {
BeforeEach(func() {
wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
errorHandler.HandleError(responseWriter, err)
})

It("has a 525 Status Code", func() {
Expect(responseWriter.Status()).To(Equal(525))
})

It("emits a backend_tls_handshake_failed metric", func() {
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
})
})

Context("Context Cancelled Error", func() {
BeforeEach(func() {
err = context.Canceled
Expand Down
16 changes: 7 additions & 9 deletions proxy/round_tripper/proxy_round_tripper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package round_tripper
import (
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
Expand Down Expand Up @@ -177,7 +176,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response
if err != nil {
reqInfo.FailedAttempts++
reqInfo.LastFailedAttemptFinishedAt = time.Now()
retriable, err := rt.isRetriable(request, err, trace)
retriable := rt.isRetriable(request, err, trace)

logger.Error("backend-endpoint-failed",
zap.Error(err),
Expand Down Expand Up @@ -226,7 +225,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response
if err != nil {
reqInfo.FailedAttempts++
reqInfo.LastFailedAttemptFinishedAt = time.Now()
retriable, err := rt.isRetriable(request, err, trace)
retriable := rt.isRetriable(request, err, trace)

logger.Error(
"route-service-connection-failed",
Expand Down Expand Up @@ -482,24 +481,23 @@ func isIdempotent(request *http.Request) bool {
return false
}

func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) (bool, error) {
func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) bool {
// if the context has been cancelled we do not perform further retries
if request.Context().Err() != nil {
return false, fmt.Errorf("%w (%w)", request.Context().Err(), err)
return false
}

// io.EOF errors are considered safe to retry for certain requests
// Replace the error here to track this state when classifying later.
if err == io.EOF && isIdempotent(request) {
err = fails.IdempotentRequestEOFError
return true
}
// We can retry for sure if we never obtained a connection
// since there is no way any data was transmitted. If headers could not
// be written in full, the request should also be safe to retry.
if !trace.GotConn() || !trace.WroteHeaders() {
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, err)
return true
}

retriable := rt.retriableClassifier.Classify(err)
return retriable, err
return rt.retriableClassifier.Classify(err)
}
90 changes: 0 additions & 90 deletions proxy/round_tripper/proxy_round_tripper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
Expand All @@ -23,7 +22,6 @@ import (
"code.cloudfoundry.org/gorouter/config"
"code.cloudfoundry.org/gorouter/handlers"
"code.cloudfoundry.org/gorouter/metrics/fakes"
"code.cloudfoundry.org/gorouter/proxy/fails"
"code.cloudfoundry.org/gorouter/proxy/handler"
"code.cloudfoundry.org/gorouter/proxy/round_tripper"
"code.cloudfoundry.org/gorouter/proxy/utils"
Expand Down Expand Up @@ -434,94 +432,6 @@ var _ = Describe("ProxyRoundTripper", func() {
})
})

DescribeTable("when the backend fails with an empty response error (io.EOF)",
func(reqBody io.ReadCloser, getBodyIsNil bool, reqMethod string, headers map[string]string, classify fails.ClassifierFunc, expectRetry bool) {
badResponse := &http.Response{
Header: make(map[string][]string),
}
badResponse.Header.Add(handlers.VcapRequestIdHeader, "some-request-id")

// The first request fails with io.EOF, the second (if retried) succeeds
transport.RoundTripStub = func(*http.Request) (*http.Response, error) {
switch transport.RoundTripCallCount() {
case 1:
return nil, io.EOF
case 2:
return &http.Response{StatusCode: http.StatusTeapot}, nil
default:
return nil, nil
}
}

retriableClassifier.ClassifyStub = classify
req.Method = reqMethod
req.Body = reqBody
if !getBodyIsNil {
req.GetBody = func() (io.ReadCloser, error) {
return new(testBody), nil
}
}
if headers != nil {
for key, value := range headers {
req.Header.Add(key, value)
}
}

res, err := proxyRoundTripper.RoundTrip(req)

if expectRetry {
Expect(err).NotTo(HaveOccurred())
Expect(transport.RoundTripCallCount()).To(Equal(2))
Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1))
Expect(res.StatusCode).To(Equal(http.StatusTeapot))
} else {
Expect(errors.Is(err, io.EOF)).To(BeTrue())
Expect(transport.RoundTripCallCount()).To(Equal(1))
Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1))
}
},

Entry("POST, body is empty: does not retry", nil, true, "POST", nil, fails.IdempotentRequestEOF, false),
Entry("POST, body is not empty and GetBody is non-nil: does not retry", reqBody, false, "POST", nil, fails.IdempotentRequestEOF, false),
Entry("POST, body is not empty: does not retry", reqBody, true, "POST", nil, fails.IdempotentRequestEOF, false),
Entry("POST, body is http.NoBody: does not retry", http.NoBody, true, "POST", nil, fails.IdempotentRequestEOF, false),

Entry("POST, body is empty, X-Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
Entry("POST, body is not empty and GetBody is non-nil, X-Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
Entry("POST, body is not empty, X-Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
Entry("POST, body is http.NoBody, X-Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),

Entry("POST, body is empty, Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
Entry("POST, body is not empty and GetBody is non-nil, Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true),
Entry("POST, body is not empty, Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),
Entry("POST, body is http.NoBody, Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false),

Entry("GET, body is empty: attempts retry", nil, true, "GET", nil, fails.IncompleteRequest, true),
Entry("GET, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "GET", nil, fails.IncompleteRequest, true),
Entry("GET, body is not empty: does not retry", reqBody, true, "GET", nil, fails.IdempotentRequestEOF, false),
Entry("GET, body is http.NoBody: does not retry", http.NoBody, true, "GET", nil, fails.IdempotentRequestEOF, false),

Entry("TRACE, body is empty: attempts retry", nil, true, "TRACE", nil, fails.IncompleteRequest, true),
Entry("TRACE, body is not empty: does not retry", reqBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false),
Entry("TRACE, body is http.NoBody: does not retry", http.NoBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false),
Entry("TRACE, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "TRACE", nil, fails.IncompleteRequest, true),

Entry("HEAD, body is empty: attempts retry", nil, true, "HEAD", nil, fails.IncompleteRequest, true),
Entry("HEAD, body is not empty: does not retry", reqBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false),
Entry("HEAD, body is http.NoBody: does not retry", http.NoBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false),
Entry("HEAD, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "HEAD", nil, fails.IncompleteRequest, true),

Entry("OPTIONS, body is empty: attempts retry", nil, true, "OPTIONS", nil, fails.IncompleteRequest, true),
Entry("OPTIONS, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "OPTIONS", nil, fails.IncompleteRequest, true),
Entry("OPTIONS, body is not empty: does not retry", reqBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false),
Entry("OPTIONS, body is http.NoBody: does not retry", http.NoBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false),

Entry("<empty method>, body is empty: attempts retry", nil, true, "", nil, fails.IncompleteRequest, true),
Entry("<empty method>, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "", nil, fails.IncompleteRequest, true),
Entry("<empty method>, body is not empty: does not retry", reqBody, true, "", nil, fails.IdempotentRequestEOF, false),
Entry("<empty method>, body is http.NoBody: does not retry", http.NoBody, true, "", nil, fails.IdempotentRequestEOF, false),
)

Context("when there are no more endpoints available", func() {
BeforeEach(func() {
removed := routePool.Remove(endpoint)
Expand Down

0 comments on commit 6f65c1c

Please sign in to comment.