-
Notifications
You must be signed in to change notification settings - Fork 14.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CeleryKubernetesExecutor (#10901)
it consists of CeleryExecutor and KubernetesExecutor, which allows users to route their tasks to either Kubernetes or Celery based on the queue defined on a task
- Loading branch information
Showing
10 changed files
with
505 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,8 +48,8 @@ hostname_callable = socket.getfqdn | |
default_timezone = utc | ||
|
||
# The executor class that airflow should use. Choices include | ||
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor | ||
# or the full import path to the class when using a custom executor. | ||
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor, KubernetesExecutor, | ||
# CeleryKubernetesExecutor or the full import path to the class when using a custom executor. | ||
executor = SequentialExecutor | ||
|
||
# The SqlAlchemy connection string to the metadata database. | ||
|
@@ -577,6 +577,15 @@ smtp_mail_from = [email protected] | |
sentry_on = false | ||
sentry_dsn = | ||
|
||
[celery_kubernetes_executor] | ||
|
||
# This section only applies if you are using the CeleryKubernetesExecutor in | ||
# ``[core]`` section above | ||
# Define when to send a task to KubernetesExecutor when using CeleryKuebernetesExecutor. | ||
# When the queue of a task is kubernetes_queue, the task is executed via KubernetesExecutor, | ||
# otherwise via CeleryExecutor | ||
kubernetes_queue = kubernetes | ||
|
||
[celery] | ||
|
||
# This section only applies if you are using the CeleryExecutor in | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
from typing import Dict, Optional, Set, Union | ||
|
||
from airflow.configuration import conf | ||
from airflow.executors.base_executor import CommandType, EventBufferValueType, QueuedTaskInstanceType | ||
from airflow.executors.celery_executor import CeleryExecutor | ||
from airflow.executors.kubernetes_executor import KubernetesExecutor | ||
from airflow.models.taskinstance import SimpleTaskInstance, TaskInstance, TaskInstanceKey | ||
from airflow.utils.log.logging_mixin import LoggingMixin | ||
|
||
|
||
class CeleryKubernetesExecutor(LoggingMixin): | ||
""" | ||
CeleryKubernetesExecutor consists of CeleryExecutor and KubernetesExecutor. | ||
It chooses an executor to use based on the queue defined on the task. | ||
When the queue is `kubernetes`, KubernetesExecutor is selected to run the task, | ||
otherwise, CeleryExecutor is used. | ||
""" | ||
|
||
KUBERNETES_QUEUE = conf.get('celery_kubernetes_executor', 'kubernetes_queue') | ||
|
||
def __init__(self, celery_executor, kubernetes_executor): | ||
super().__init__() | ||
self.celery_executor = celery_executor | ||
self.kubernetes_executor = kubernetes_executor | ||
|
||
@property | ||
def queued_tasks(self) -> Dict[TaskInstanceKey, QueuedTaskInstanceType]: | ||
""" | ||
Return queued tasks from celery and kubernetes executor | ||
""" | ||
queued_tasks = self.celery_executor.queued_tasks.copy() | ||
queued_tasks.update(self.kubernetes_executor.queued_tasks) | ||
|
||
return queued_tasks | ||
|
||
@property | ||
def running(self) -> Set[TaskInstanceKey]: | ||
""" | ||
Return running tasks from celery and kubernetes executor | ||
""" | ||
return self.celery_executor.running.union(self.kubernetes_executor.running) | ||
|
||
def start(self) -> None: | ||
"""Start celery and kubernetes executor""" | ||
self.celery_executor.start() | ||
self.kubernetes_executor.start() | ||
|
||
def queue_command(self, | ||
simple_task_instance: SimpleTaskInstance, | ||
command: CommandType, | ||
priority: int = 1, | ||
queue: Optional[str] = None): | ||
"""Queues command via celery or kubernetes executor""" | ||
executor = self._router(simple_task_instance) | ||
self.log.debug("Using executor: %s for %s", | ||
executor.__class__.__name__, simple_task_instance.key | ||
) | ||
executor.queue_command(simple_task_instance, command, priority, queue) | ||
|
||
def queue_task_instance( | ||
self, | ||
task_instance: TaskInstance, | ||
mark_success: bool = False, | ||
pickle_id: Optional[str] = None, | ||
ignore_all_deps: bool = False, | ||
ignore_depends_on_past: bool = False, | ||
ignore_task_deps: bool = False, | ||
ignore_ti_state: bool = False, | ||
pool: Optional[str] = None, | ||
cfg_path: Optional[str] = None) -> None: | ||
"""Queues task instance via celery or kubernetes executor""" | ||
executor = self._router(SimpleTaskInstance(task_instance)) | ||
self.log.debug("Using executor: %s to queue_task_instance for %s", | ||
executor.__class__.__name__, task_instance.key | ||
) | ||
executor.queue_task_instance( | ||
task_instance, | ||
mark_success, | ||
pickle_id, | ||
ignore_all_deps, | ||
ignore_depends_on_past, | ||
ignore_task_deps, | ||
ignore_ti_state, | ||
pool, | ||
cfg_path | ||
) | ||
|
||
def has_task(self, task_instance: TaskInstance) -> bool: | ||
""" | ||
Checks if a task is either queued or running in either celery or kubernetes executor. | ||
:param task_instance: TaskInstance | ||
:return: True if the task is known to this executor | ||
""" | ||
return self.celery_executor.has_task(task_instance) \ | ||
or self.kubernetes_executor.has_task(task_instance) | ||
|
||
def heartbeat(self) -> None: | ||
""" | ||
Heartbeat sent to trigger new jobs in celery and kubernetes executor | ||
""" | ||
self.celery_executor.heartbeat() | ||
self.kubernetes_executor.heartbeat() | ||
|
||
def get_event_buffer(self, dag_ids=None) -> Dict[TaskInstanceKey, EventBufferValueType]: | ||
""" | ||
Returns and flush the event buffer from celery and kubernetes executor | ||
:param dag_ids: to dag_ids to return events for, if None returns all | ||
:return: a dict of events | ||
""" | ||
cleared_events_from_celery = self.celery_executor.get_event_buffer(dag_ids) | ||
cleared_events_from_kubernetes = self.kubernetes_executor.get_event_buffer(dag_ids) | ||
|
||
return {**cleared_events_from_celery, **cleared_events_from_kubernetes} | ||
|
||
def end(self) -> None: | ||
""" | ||
End celery and kubernetes executor | ||
""" | ||
self.celery_executor.end() | ||
self.kubernetes_executor.end() | ||
|
||
def terminate(self) -> None: | ||
""" | ||
Terminate celery and kubernetes executor | ||
""" | ||
self.celery_executor.terminate() | ||
self.kubernetes_executor.terminate() | ||
|
||
def _router(self, simple_task_instance: SimpleTaskInstance) -> Union[CeleryExecutor, KubernetesExecutor]: | ||
""" | ||
Return either celery_executor or kubernetes_executor | ||
:param simple_task_instance: SimpleTaskInstance | ||
:return: celery_executor or kubernetes_executor | ||
:rtype: Union[CeleryExecutor, KubernetesExecutor] | ||
""" | ||
if simple_task_instance.queue == self.KUBERNETES_QUEUE: | ||
return self.kubernetes_executor | ||
return self.celery_executor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
.. Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
.. http://www.apache.org/licenses/LICENSE-2.0 | ||
.. Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
.. _executor:CeleryKubernetesExecutor: | ||
|
||
CeleryKubernetes Executor | ||
========================= | ||
|
||
The :class:`~airflow.executors.celery_kubernetes_executor.CeleryKubernetesExecutor` allows users | ||
to run simultaneously ``CeleryExecutor`` and a ``KubernetesExecutor``. | ||
An executor is chosen to run a task based on the task's queue. | ||
|
||
``CeleryKubernetesExecutor`` inherits the scalability of ``CeleryExecutor`` to | ||
handle the high load at the peak time and runtime isolation of ``KubernetesExecutor``. | ||
|
||
|
||
When to use CeleryKubernetesExecutor | ||
#################################### | ||
|
||
``CeleryKubernetesExecutor`` should only be used at certain cases, given that | ||
it requires setting up ``CeleryExecutor`` and ``KubernetesExecutor``. | ||
|
||
We recommend considering ``CeleryKubernetesExecutor`` when your use case meets: | ||
|
||
1. The number of tasks needed to be scheduled at the peak exceeds the scale that your kubernetes cluster | ||
can comfortably handle | ||
|
||
2. A relative small portion of your tasks requires runtime isolation. | ||
|
||
3. You have plenty of small tasks that can be executed on Celery workers | ||
but you also have resource-hungry tasks that will be better to run in predefined environments. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,3 +48,4 @@ Supported Backends | |
dask | ||
celery | ||
kubernetes | ||
celery_kubernetes |
Oops, something went wrong.