-
Notifications
You must be signed in to change notification settings - Fork 14.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AIRFLOW-6529] Pickle error occurs when the scheduler tries to run on macOS. #7128
Changes from 17 commits
11fd854
f6ab7fc
0722530
91d783d
c142623
0cee214
5dcaed4
8107757
0f8aad8
4cd1f61
9344c94
f5f1b5a
689f025
6d7e6a2
5f12283
ea5853f
396c41b
368607b
b612321
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
import enum | ||
import importlib | ||
import logging | ||
import multiprocessing | ||
import multiprocessing as mp | ||
import os | ||
import signal | ||
import sys | ||
|
@@ -37,7 +37,7 @@ | |
import airflow.models | ||
from airflow.configuration import conf | ||
from airflow.dag.base_dag import BaseDag, BaseDagBag | ||
from airflow.exceptions import AirflowException | ||
from airflow.exceptions import AirflowConfigException, AirflowException | ||
from airflow.models import Connection, errors | ||
from airflow.models.taskinstance import SimpleTaskInstance | ||
from airflow.settings import STORE_SERIALIZED_DAGS | ||
|
@@ -292,6 +292,8 @@ def __init__(self, | |
max_runs, | ||
processor_factory, | ||
processor_timeout, | ||
dag_ids, | ||
pickle_dags, | ||
async_mode): | ||
""" | ||
:param dag_directory: Directory where DAG definitions are kept. All | ||
|
@@ -307,6 +309,10 @@ def __init__(self, | |
:type processor_factory: (unicode, unicode, list) -> (AbstractDagFileProcessorProcess) | ||
:param processor_timeout: How long to wait before timing out a DAG file processor | ||
:type processor_timeout: timedelta | ||
:param dag_ids: if specified, only schedule tasks with these DAG IDs | ||
:type dag_ids: list[str] | ||
:param pickle_dags: whether to pickle DAGs. | ||
:type: pickle_dags: bool | ||
:param async_mode: Whether to start agent in async mode | ||
:type async_mode: bool | ||
""" | ||
|
@@ -316,6 +322,8 @@ def __init__(self, | |
self._max_runs = max_runs | ||
self._processor_factory = processor_factory | ||
self._processor_timeout = processor_timeout | ||
self._dag_ids = dag_ids | ||
self._pickle_dags = pickle_dags | ||
self._async_mode = async_mode | ||
# Map from file path to the processor | ||
self._processors = {} | ||
|
@@ -332,18 +340,37 @@ def start(self): | |
""" | ||
Launch DagFileProcessorManager processor and start DAG parsing loop in manager. | ||
""" | ||
self._parent_signal_conn, child_signal_conn = multiprocessing.Pipe() | ||
self._process = multiprocessing.Process( | ||
if conf.has_option('core', 'mp_start_method'): | ||
mp_start_method = conf.get('core', 'mp_start_method') | ||
else: | ||
mp_start_method = mp.get_start_method() | ||
|
||
possible_value_list = mp.get_all_start_methods() | ||
if mp_start_method not in possible_value_list: | ||
raise AirflowConfigException( | ||
"mp_start_method should not be " + mp_start_method + | ||
". Possible value is one of " + str(possible_value_list)) | ||
cxt = mp.get_context(mp_start_method) | ||
|
||
self._parent_signal_conn, child_signal_conn = cxt.Pipe() | ||
args_list = [ | ||
self._dag_directory, | ||
self._file_paths, | ||
self._max_runs, | ||
self._processor_factory, | ||
self._processor_timeout, | ||
child_signal_conn, | ||
self._dag_ids, | ||
self._pickle_dags, | ||
self._async_mode | ||
] | ||
|
||
if cxt.get_start_method() != "fork": | ||
args_list.append(conf) | ||
|
||
self._process = cxt.Process( | ||
target=type(self)._run_processor_manager, | ||
args=( | ||
self._dag_directory, | ||
self._file_paths, | ||
self._max_runs, | ||
self._processor_factory, | ||
self._processor_timeout, | ||
child_signal_conn, | ||
self._async_mode, | ||
) | ||
args=args_list | ||
) | ||
self._process.start() | ||
|
||
|
@@ -388,12 +415,20 @@ def _run_processor_manager(dag_directory, | |
processor_factory, | ||
processor_timeout, | ||
signal_conn, | ||
async_mode): | ||
dag_ids, | ||
pickle_dags, | ||
async_mode, | ||
inherited_conf=None): | ||
|
||
# Make this process start as a new process group - that makes it easy | ||
# to kill all sub-process of this at the OS-level, rather than having | ||
# to iterate the child processes | ||
os.setpgid(0, 0) | ||
if inherited_conf is not None: # pylint: disable=too-many-nested-blocks | ||
for section in inherited_conf: | ||
for key, value in inherited_conf[section].items(): | ||
if value not in conf: | ||
conf.set(section, key, value.replace("%", "%%")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks suspect. What's going on here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also don't see anywhere that we actually pass a value here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ashb If only |
||
|
||
setproctitle("airflow scheduler -- DagFileProcessorManager") | ||
# Reload configurations and settings to avoid collision with parent process. | ||
|
@@ -414,6 +449,8 @@ def _run_processor_manager(dag_directory, | |
processor_factory, | ||
processor_timeout, | ||
signal_conn, | ||
dag_ids, | ||
pickle_dags, | ||
async_mode) | ||
|
||
processor_manager.start() | ||
|
@@ -527,6 +564,10 @@ class DagFileProcessorManager(LoggingMixin): # pylint: disable=too-many-instanc | |
:type processor_timeout: timedelta | ||
:param signal_conn: connection to communicate signal with processor agent. | ||
:type signal_conn: airflow.models.connection.Connection | ||
:param dag_ids: if specified, only schedule tasks with these DAG IDs | ||
:type dag_ids: list[str] | ||
:param pickle_dags: whether to pickle DAGs. | ||
:type pickle_dags: bool | ||
:param async_mode: whether to start the manager in async mode | ||
:type async_mode: bool | ||
""" | ||
|
@@ -538,13 +579,17 @@ def __init__(self, | |
processor_factory: Callable[[str, List[Any]], AbstractDagFileProcessorProcess], | ||
processor_timeout: timedelta, | ||
signal_conn: Connection, | ||
dag_ids: List[str], | ||
pickle_dags: bool, | ||
async_mode: bool = True): | ||
self._file_paths = file_paths | ||
self._file_path_queue: List[str] = [] | ||
self._dag_directory = dag_directory | ||
self._max_runs = max_runs | ||
self._processor_factory = processor_factory | ||
self._signal_conn = signal_conn | ||
self._pickle_dags = pickle_dags | ||
self._dag_ids = dag_ids | ||
self._async_mode = async_mode | ||
self._parsing_start_time: Optional[datetime] = None | ||
|
||
|
@@ -1055,7 +1100,7 @@ def heartbeat(self): | |
# Start more processors if we have enough slots and files to process | ||
while self._parallelism - len(self._processors) > 0 and self._file_path_queue: | ||
file_path = self._file_path_queue.pop(0) | ||
processor = self._processor_factory(file_path, self._zombies) | ||
processor = self._processor_factory(file_path, self._zombies, self._dag_ids, self._pickle_dags) | ||
Stats.incr('dag_processing.processes') | ||
|
||
processor.start() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When is inherited_conf used -- I can't find this one used anywhere either.
We want to set the process title always.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, this is just a miss-indentation. I'll fix it.