From 96509923552fb52615b0dec33c3fb1560b7c45ab Mon Sep 17 00:00:00 2001 From: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com> Date: Fri, 29 Jan 2021 11:22:53 -0700 Subject: [PATCH] Docs: Fix FAQ on scheduler latency (#13969) (cherry picked from commit ddc424283c55918995c0409a4d4b664b97a0e973) --- docs/apache-airflow/faq.rst | 7 ++----- docs/apache-airflow/scheduler.rst | 4 ++-- docs/build_docs.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst index edc24ab884751..e5cdfd2050b6e 100644 --- a/docs/apache-airflow/faq.rst +++ b/docs/apache-airflow/faq.rst @@ -205,11 +205,8 @@ This means ``explicit_defaults_for_timestamp`` is disabled in your mysql server How to reduce airflow dag scheduling latency in production? ----------------------------------------------------------- -- ``parsing_processes``: Scheduler will spawn multiple threads in parallel to parse dags. - This is controlled by ``parsing_processes`` with default value of 2. - User should increase this value to a larger value (e.g numbers of cpus where scheduler runs + 1) in production. -- If you're using Airflow 1.10.x, consider moving to Airflow 2, which has reduced dag scheduling latency dramatically, - and allows for running multiple schedulers. +Airflow 2 has low DAG scheduling latency out of the box (particularly when compared with Airflow 1.10.x), +however if you need more throughput you can :ref:`start multiple schedulers`. Why next_ds or prev_ds might not contain expected values? --------------------------------------------------------- diff --git a/docs/apache-airflow/scheduler.rst b/docs/apache-airflow/scheduler.rst index 8e047fe9a482e..54c8f66cb7644 100644 --- a/docs/apache-airflow/scheduler.rst +++ b/docs/apache-airflow/scheduler.rst @@ -66,11 +66,11 @@ This only has effect if your DAG has no ``schedule_interval``. If you keep default ``allow_trigger_in_future = False`` and try 'external trigger' to run future-dated execution dates, the scheduler won't execute it now but the scheduler will execute it in the future once the current date rolls over to the execution date. +.. _scheduler:ha: + Running More Than One Scheduler ------------------------------- -.. _scheduler:ha: - .. versionadded: 2.0.0 Airflow supports running more than one scheduler concurrently -- both for performance reasons and for diff --git a/docs/build_docs.py b/docs/build_docs.py index f0486ebea03d4..1080533c5189b 100755 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -75,7 +75,7 @@ def _promote_new_flags(): print("Still too slow?") print() print("You can only build one documentation package:") - print(" ./breeze build-docs --package-filter ") + print(" ./breeze build-docs -- --package-filter ") print() print("This usually takes from 20 seconds to 2 minutes.") print()