Skip to content

Commit

Permalink
Check exception type to set retryCountsAgainstLimit for alarms
Browse files Browse the repository at this point in the history
  • Loading branch information
jqmmes committed Jul 24, 2024
1 parent d1b6269 commit ae09c6b
Showing 1 changed file with 31 additions and 2 deletions.
33 changes: 31 additions & 2 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,10 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
}
}

// We only want to retry against limits if it's a user error. By default let's check if the
// output gate is broken.
auto shouldRetryCountsAgainstLimits = !context.isOutputGateBroken();

// We want to alert if we aren't going to count this alarm retry against limits
if (auto desc = e.getDescription();
!jsg::isTunneledException(desc) && !jsg::isDoNotLogException(desc)
Expand All @@ -476,10 +480,21 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
// We don't usually log these messages, but it's useful to know the real reason we failed
// to correctly investigate stuck alarms.
LOG_NOSENTRY(ERROR, "output lock broke during alarm execution without an interesting error description", actorId, e);
if (e.getType() == kj::Exception::Type::OVERLOADED) {
// The handler failed because the user overloaded the object. It's their fault, we'll not
// retry forever.
auto msg = e.getDescription();
while (msg.startsWith("remote."_kj)) {
msg = msg.slice("remote."_kj.size());
}
if (!msg.startsWith("broken.outputGateBroken")) {
shouldRetryCountsAgainstLimits = true;
}
}
}
return WorkerInterface::AlarmResult {
.retry = true,
.retryCountsAgainstLimit = !context.isOutputGateBroken(),
.retryCountsAgainstLimit = shouldRetryCountsAgainstLimits,
.outcome = outcome
};
})
Expand All @@ -497,6 +512,9 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
actorId = kj::str(s);
}
}
// We only want to retry against limits if it's a user error. By default let's assume it's our
// fault.
auto shouldRetryCountsAgainstLimits = false;
if (auto desc = e.getDescription();
!jsg::isTunneledException(desc) && !jsg::isDoNotLogException(desc)) {
if (isInterestingException(e)) {
Expand All @@ -508,10 +526,21 @@ kj::Promise<WorkerInterface::AlarmResult> ServiceWorkerGlobalScope::runAlarm(
// We don't usually log these messages, but it's useful to know the real reason we failed
// to correctly investigate stuck alarms.
LOG_NOSENTRY(ERROR, "output lock broke after executing alarm without an interesting error description", actorId, e);
if (e.getType() == kj::Exception::Type::OVERLOADED) {
// The handler failed because the user overloaded the object. It's their fault, we'll not
// retry forever.
auto msg = e.getDescription();
while (msg.startsWith("remote."_kj)) {
msg = msg.slice("remote."_kj.size());
}
if (!msg.startsWith("broken.outputGateBroken")) {
shouldRetryCountsAgainstLimits = true;
}
}
}
return WorkerInterface::AlarmResult {
.retry = true,
.retryCountsAgainstLimit = false,
.retryCountsAgainstLimit = shouldRetryCountsAgainstLimits,
.outcome = EventOutcome::EXCEPTION
};
});
Expand Down

0 comments on commit ae09c6b

Please sign in to comment.