-
-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Clean up the support for split queues #78274
Changes from all commits
ad1417d
6ca1033
cb608ac
954e1e9
b44d609
1b22d0a
de73acd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TypedDict | ||
|
||
|
||
class SplitQueueSize(TypedDict): | ||
# The total number of queues to create to split a single queue. | ||
# This number triggers the creation of the queues themselves | ||
# when the application starts. | ||
total: int | ||
# The number of queues to actually use. It has to be smaller or | ||
# equal to `total`. | ||
# This is the number of queues the router uses when the split | ||
# is enable on this queue. | ||
# This number exists in order to be able to safely increase or | ||
# decrease the number of queues as the queues have to be created | ||
# first, then we have to start consuming from them, only then | ||
# we can start producing. | ||
in_use: int |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import logging | ||
import random | ||
from collections.abc import Sequence | ||
from itertools import cycle | ||
|
||
from django.conf import settings | ||
|
||
from sentry import options | ||
from sentry.celery import app | ||
from sentry.utils.celery import build_queue_names | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def _get_known_queues() -> set[str]: | ||
return {c_queue.name for c_queue in app.conf.CELERY_QUEUES} | ||
|
||
|
||
def _validate_destinations(destinations: Sequence[str]) -> None: | ||
for dest in destinations: | ||
assert dest in _get_known_queues(), f"Queue {dest} in split queue config is not declared." | ||
|
||
|
||
class SplitQueueRouter: | ||
""" | ||
Returns the split queue to use for a Celery queue. | ||
Split queues allow us to spread the load of a queue to multiple ones. | ||
This takes in input a queue name and returns the split. It is supposed | ||
to be used by the code that schedules the task. | ||
Each split queue can be individually rolled out via options. | ||
WARNING: Do not forget to configure your workers to listen to the | ||
queues appropriately before you start routing messages. | ||
""" | ||
|
||
def __init__(self) -> None: | ||
known_queues = _get_known_queues() | ||
self.__queue_routers = {} | ||
for source, dest_config in settings.CELERY_SPLIT_QUEUE_ROUTES.items(): | ||
assert source in known_queues, f"Queue {source} in split queue config is not declared." | ||
assert dest_config["in_use"] <= dest_config["total"] | ||
|
||
if dest_config["in_use"] >= 2: | ||
destinations = build_queue_names(source, dest_config["in_use"]) | ||
_validate_destinations(destinations) | ||
self.__queue_routers[source] = cycle(destinations) | ||
else: | ||
logger.error( | ||
"Invalid configuration for queue %s. In use is not greater than 1: %d. Fall back to source", | ||
source, | ||
dest_config["in_use"], | ||
) | ||
|
||
def route_for_queue(self, queue: str) -> str: | ||
rollout_rate = options.get("celery_split_queue_rollout").get(queue, 0.0) | ||
if random.random() >= rollout_rate: | ||
return queue | ||
|
||
if queue in set(options.get("celery_split_queue_legacy_mode")): | ||
# Use legacy route | ||
# This router required to define the routing logic inside the | ||
# settings file. | ||
return settings.SENTRY_POST_PROCESS_QUEUE_SPLIT_ROUTER.get(queue, lambda: queue)() | ||
else: | ||
router = self.__queue_routers.get(queue) | ||
if router is not None: | ||
return next(router) | ||
else: | ||
return queue |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,62 @@ | ||
from collections.abc import Mapping, MutableSequence, Sequence | ||
from random import randint | ||
from typing import Any | ||
|
||
from celery.schedules import crontab | ||
from kombu import Queue | ||
|
||
from sentry.conf.types.celery import SplitQueueSize | ||
|
||
|
||
def crontab_with_minute_jitter(*args: Any, **kwargs: Any) -> crontab: | ||
kwargs["minute"] = randint(0, 59) | ||
return crontab(*args, **kwargs) | ||
|
||
|
||
def build_queue_names(base_name: str, quantity: int) -> Sequence[str]: | ||
ret = [] | ||
for index in range(quantity): | ||
name = f"{base_name}_{index + 1}" | ||
ret.append(name) | ||
return ret | ||
|
||
|
||
def make_split_queues(config: Mapping[str, SplitQueueSize]) -> Sequence[Queue]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't seem to call this outside of tests. Do we need to setup the post_process_transactions split queues, or are we going to rely on the patching being done in getsentry/ops for a while longer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a debate in the previous PR whether we should add the queues in sentry or getsentry. I am a bit biased towards adding them in the sentry configuration rather than getsentry to create fewer surprises. |
||
""" | ||
Generates the split queue definitions from the mapping between | ||
base queue and split queue config. | ||
""" | ||
ret: MutableSequence[Queue] = [] | ||
for base_name, conf in config.items(): | ||
names = [ | ||
Queue(name=name, routing_key=name) | ||
for name in build_queue_names(base_name, conf["total"]) | ||
] | ||
ret.extend(names) | ||
|
||
return ret | ||
|
||
|
||
def safe_append(queues: MutableSequence[Queue], queue: Queue) -> None: | ||
""" | ||
We define queues as lists in the configuration and we allow override | ||
of the config per environment. | ||
Unfortunately if you add twice a queue with the same name to the celery | ||
config. Celery just creates the queue twice. This can be an undesired behavior | ||
depending on the Celery backend. So this method allows to add queues to | ||
a list without duplications. | ||
""" | ||
existing_queue_names = {q.name for q in queues} | ||
if queue.name not in existing_queue_names: | ||
queues.append(queue) | ||
|
||
|
||
def safe_extend(queues: MutableSequence[Queue], to_add: Sequence[Queue]) -> None: | ||
""" | ||
Like `safe_append` but it works like extend adding multiple queues | ||
to the config. | ||
""" | ||
existing_queue_names = {q.name for q in queues} | ||
for q in to_add: | ||
if q.name not in existing_queue_names: | ||
queues.append(q) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks compatible with the patch that is in getsentry/ops config overrides 👍