Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: auto retry once for connection closed #3426

Merged
merged 8 commits into from
Feb 6, 2024
1 change: 1 addition & 0 deletions crates/router/src/routes/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ counter_metric!(APPLE_PAY_MANUAL_FLOW_FAILED_PAYMENT, GLOBAL_METER);
counter_metric!(APPLE_PAY_SIMPLIFIED_FLOW_FAILED_PAYMENT, GLOBAL_METER);

// Metrics for Auto Retries
counter_metric!(AUTO_RETRY_CONNECTION_CLOSED, GLOBAL_METER);
counter_metric!(AUTO_RETRY_ELIGIBLE_REQUEST_COUNT, GLOBAL_METER);
counter_metric!(AUTO_RETRY_GSM_MISS_COUNT, GLOBAL_METER);
counter_metric!(AUTO_RETRY_GSM_FETCH_FAILURE_COUNT, GLOBAL_METER);
Expand Down
97 changes: 75 additions & 22 deletions crates/router/src/services/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,7 @@ pub async fn send_request(
key: consts::METRICS_HOST_TAG_NAME.into(),
value: url.host_str().unwrap_or_default().to_string().into(),
};

let send_request = async {
let request = {
match request.method {
Method::Get => client.get(url),
Method::Post => {
Expand Down Expand Up @@ -607,32 +606,86 @@ pub async fn send_request(
.timeout(Duration::from_secs(
option_timeout_secs.unwrap_or(crate::consts::REQUEST_TIME_OUT),
))
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosed
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
};

metrics_request::record_operation_time(
// We cannot clone the request type, because it has Form trait which is not clonable. So we are cloning the request builder here.
let cloned_send_request = request.try_clone().map(|cloned_request| async {
cloned_request
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed_before_message_could_complete(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosedIncompleteMessage
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
});

let send_request = async {
request
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed_before_message_could_complete(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosedIncompleteMessage
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
};

let response = metrics_request::record_operation_time(
send_request,
&metrics::EXTERNAL_REQUEST_TIME,
&[metrics_tag],
&[metrics_tag.clone()],
)
.await
.await;
// Retry once if the response is connection closed.
//
// This is just due to the racy nature of networking.
// hyper has a connection pool of idle connections, and it selected one to send your request.
// Most of the time, hyper will receive the server’s FIN and drop the dead connection from its pool.
// But occasionally, a connection will be selected from the pool
// and written to at the same time the server is deciding to close the connection.
// Since hyper already wrote some of the request,
// it can’t really retry it automatically on a new connection, since the server may have acted already
match response {
Ok(response) => Ok(response),
Err(error)
if error.current_context()
== &errors::ApiClientError::ConnectionClosedIncompleteMessage =>
{
metrics::AUTO_RETRY_CONNECTION_CLOSED.add(&metrics::CONTEXT, 1, &[]);
match cloned_send_request {
Some(cloned_request) => {
metrics_request::record_operation_time(
cloned_request,
&metrics::EXTERNAL_REQUEST_TIME,
&[metrics_tag],
)
.await
}
None => Err(error),
SanchithHegde marked this conversation as resolved.
Show resolved Hide resolved
}
}
err @ Err(_) => err,
}
}

fn is_connection_closed(error: &reqwest::Error) -> bool {
fn is_connection_closed_before_message_could_complete(error: &reqwest::Error) -> bool {
let mut source = error.source();
while let Some(err) = source {
if let Some(hyper_err) = err.downcast_ref::<hyper::Error>() {
Expand Down Expand Up @@ -1674,7 +1727,7 @@ pub fn build_redirection_form(

// Initialize the ThreeDSService
const threeDS = gateway.get3DSecure();

const options = {{
customerVaultId: '{customer_vault_id}',
currency: '{currency}',
Expand Down
6 changes: 3 additions & 3 deletions crates/storage_impl/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ pub enum ApiClientError {
RequestTimeoutReceived,

#[error("connection closed before a message could complete")]
ConnectionClosed,
ConnectionClosedIncompleteMessage,

#[error("Server responded with Internal Server Error")]
InternalServerErrorReceived,
Expand All @@ -285,8 +285,8 @@ impl ApiClientError {
pub fn is_upstream_timeout(&self) -> bool {
self == &Self::RequestTimeoutReceived
}
pub fn is_connection_closed(&self) -> bool {
self == &Self::ConnectionClosed
pub fn is_connection_closed_before_message_could_complete(&self) -> bool {
self == &Self::ConnectionClosedIncompleteMessage
}
}

Expand Down
Loading