From 512a510ddbcd561a62a789f6180c72e7534ecad4 Mon Sep 17 00:00:00 2001 From: Jed Cunningham <66968678+jedcunningham@users.noreply.github.com> Date: Mon, 12 Apr 2021 14:35:40 -0600 Subject: [PATCH] Change default of `[kubernetes] enable_tcp_keepalive` to `True` We've seen instances of connection resets happening, particularly in Azure, that are remedied by enabling tcp_keepalive. Enabling it by default should be safe and sane regardless of where we are running. --- UPDATING.md | 6 +++++- airflow/config_templates/config.yml | 2 +- airflow/config_templates/default_airflow.cfg | 2 +- airflow/kubernetes/kube_client.py | 10 +++++----- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 7f059e2b482ad8..bbacbe05202af1 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -74,7 +74,7 @@ https://developers.google.com/style/inclusive-documentation Moved the pod launcher from `airflow.kubernetes.pod_launcher` to `airflow.providers.cncf.kubernetes.utils.pod_launcher` -This will alow users to update the pod_launcher for the KubernetesPodOperator without requiring an airflow upgrade +This will allow users to update the pod_launcher for the KubernetesPodOperator without requiring an airflow upgrade ### Default `[webserver] worker_refresh_interval` is changed to `6000` seconds @@ -91,6 +91,10 @@ serve as a DagBag cache burst time. The `default_queue` configuration option has been moved from `[celery]` section to `[operators]` section to allow for re-use between different executors. +### Default `[kubernetes] enable_tcp_keepalive` is changed to `True` + +This allows Airflow to work more reliably with some environments (like Azure) by default. + ## Airflow 2.0.1 ### Permission to view Airflow Configurations has been removed from `User` and `Viewer` role diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 1e4d4b146b8fc1..edee338bba356d 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -2116,7 +2116,7 @@ version_added: ~ type: boolean example: ~ - default: "False" + default: "True" - name: tcp_keep_idle description: | When the `enable_tcp_keepalive` option is enabled, TCP probes a connection that has diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index c880f3e78a505c..c498d466f41e5e 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -1045,7 +1045,7 @@ delete_option_kwargs = # Enables TCP keepalive mechanism. This prevents Kubernetes API requests to hang indefinitely # when idle connection is time-outed on services like cloud load balancers or firewalls. -enable_tcp_keepalive = False +enable_tcp_keepalive = True # When the `enable_tcp_keepalive` option is enabled, TCP probes a connection that has # been idle for `tcp_keep_idle` seconds. diff --git a/airflow/kubernetes/kube_client.py b/airflow/kubernetes/kube_client.py index c83b5e067a65bd..aaadbc93a905de 100644 --- a/airflow/kubernetes/kube_client.py +++ b/airflow/kubernetes/kube_client.py @@ -85,9 +85,9 @@ def _enable_tcp_keepalive() -> None: from urllib3.connection import HTTPConnection, HTTPSConnection - tcp_keep_idle = conf.getint('kubernetes', 'tcp_keep_idle', fallback=120) - tcp_keep_intvl = conf.getint('kubernetes', 'tcp_keep_intvl', fallback=30) - tcp_keep_cnt = conf.getint('kubernetes', 'tcp_keep_cnt', fallback=6) + tcp_keep_idle = conf.getint('kubernetes', 'tcp_keep_idle') + tcp_keep_intvl = conf.getint('kubernetes', 'tcp_keep_intvl') + tcp_keep_cnt = conf.getint('kubernetes', 'tcp_keep_cnt') socket_options = [ (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), @@ -125,10 +125,10 @@ def get_kube_client( if config_file is None: config_file = conf.get('kubernetes', 'config_file', fallback=None) - if conf.getboolean('kubernetes', 'enable_tcp_keepalive', fallback=False): + if conf.getboolean('kubernetes', 'enable_tcp_keepalive'): _enable_tcp_keepalive() - if not conf.getboolean('kubernetes', 'verify_ssl', fallback=True): + if not conf.getboolean('kubernetes', 'verify_ssl'): _disable_verify_ssl() client_conf = _get_kube_config(in_cluster, cluster_context, config_file)