From 3fbf7c869ab7cac758fe26fd5d74f3ebae699719 Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Mon, 11 Mar 2024 14:58:46 +0000 Subject: [PATCH] Increase the timeout for storage operations before resetting an actor The lower timeout could plausibly be hit purely due to a busy CPU loop after issuing a storage operation. The new number matches the CPU limit per request. There's no need for this to be exactly the same number as that, but it seemed just as good a reason as I had for any other larger number. --- src/workerd/io/worker.c++ | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index f44a5e13354..7aa6d717a73 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -2858,14 +2858,10 @@ struct Worker::Actor::Impl { // Implements InputGate::Hooks. kj::Promise makeTimeoutPromise() override { -#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) - // Give more time under ASAN. - // - // TODO(cleanup): Should this be configurable? - auto timeout = 20 * kj::SECONDS; -#else - auto timeout = 10 * kj::SECONDS; -#endif + // This really only protects against total hangs. Lowering the timeout drastically is risky, + // since low timeouts can spuriously fire when under heavy CPU load, failing requests that + // would otherwise succeed. + auto timeout = 30 * kj::SECONDS; co_await timerChannel.afterLimitTimeout(timeout); kj::throwFatalException(KJ_EXCEPTION(FAILED, "broken.outputGateBroken; jsg.Error: Durable Object storage operation exceeded "