From 755cf95b0df132c1d3fb80ccff6d5b52de708514 Mon Sep 17 00:00:00 2001 From: kshyanashree <109167932+kshyanashree@users.noreply.github.com> Date: Fri, 21 Apr 2023 07:29:25 -0700 Subject: [PATCH] [6.2.0]Allow remote retry max delay to be user configurable (#18061) * Allow remote retry max delay to be user configurable This introduces a new option `--remote_retry_max_delay` can be used to change the existing maximum exponential backoff interval used when retrying remote requests. Before this change, there was a hardcoded value controlling this maximum exponential backoff interval, set to `5s`. Rational `remote_retries` is useful in masking over temporary disruptions to a remote cluster. If a cluster experiences temporary downtime, it is useful to allow bazel clients to wait for a period of time for the cluster to recover before bailing and giving up. If users cannot configure the maximum exponential backoff delay, one must set a large number for `remote_retries`, each retry eventually waiting for up to 5s. This allows the bazel client to wait for a reasonable amount of time for the cluster to recover. The problem here is that under certain cluster failure modes, requests may not be handled and failed quickly, rather they may wait until `remote_timeout` before failing. A large `remote_timeout` combined with a large `remote_retries` could lead to waiting for a very long time before finally bailing on a given action. If a user can bump the `remote_retry_max_delay`, they can control the retry waiting semantics to their own needs. Closes #16058. PiperOrigin-RevId: 523680725 Change-Id: I21daba78b91d3157362ca85bb7b1cbbef8a94bb3 * Replace RemoteDurationConverter with RemoteTimeoutConverter --------- Co-authored-by: Joel Jeske --- .../devtools/build/lib/remote/RemoteRetrier.java | 8 +++++--- .../build/lib/remote/options/RemoteOptions.java | 12 ++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java index 2aca34c9daaaed..403af13d55a7cc 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.remote; +import static java.lang.Math.max; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; @@ -158,7 +160,7 @@ public static class ExponentialBackoff implements Backoff { Preconditions.checkArgument(jitter >= 0 && jitter <= 1, "jitter must be in the range (0, 1)"); Preconditions.checkArgument(maxAttempts >= 0, "maxAttempts must be >= 0"); nextDelayMillis = initial.toMillis(); - maxMillis = max.toMillis(); + maxMillis = max(max.toMillis(), nextDelayMillis); this.multiplier = multiplier; this.jitter = jitter; this.maxAttempts = maxAttempts; @@ -166,8 +168,8 @@ public static class ExponentialBackoff implements Backoff { public ExponentialBackoff(RemoteOptions options) { this( - /* initial = */ Duration.ofMillis(100), - /* max = */ Duration.ofSeconds(5), + /* initial= */ Duration.ofMillis(100), + /* max= */ options.remoteRetryMaxDelay, /* multiplier= */ 2, /* jitter= */ 0.1, options.remoteMaxRetryAttempts); diff --git a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java index ba8488382cb3b1..893523ad4dd52c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java +++ b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java @@ -362,6 +362,18 @@ public RemoteBuildEventUploadModeConverter() { + "If set to 0, retries are disabled.") public int remoteMaxRetryAttempts; + @Option( + name = "remote_retry_max_delay", + defaultValue = "5s", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.UNKNOWN}, + converter = RemoteTimeoutConverter.class, + help = + "The maximum backoff delay between remote retry attempts. Following units can be used:" + + " Days (d), hours (h), minutes (m), seconds (s), and milliseconds (ms). If" + + " the unit is omitted, the value is interpreted as seconds.") + public Duration remoteRetryMaxDelay; + @Option( name = "disk_cache", defaultValue = "null",