From 14b3e52ce08c79ee9a5b7ee383b5263a61497edb Mon Sep 17 00:00:00 2001 From: Joseph McCartin Date: Thu, 12 Dec 2019 09:27:43 +0000 Subject: [PATCH] [AIRFLOW-5744] Environment variables not correctly set in Spark submit operator --- airflow/contrib/hooks/spark_submit_hook.py | 4 +++- tests/contrib/hooks/test_spark_submit_hook.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/airflow/contrib/hooks/spark_submit_hook.py b/airflow/contrib/hooks/spark_submit_hook.py index 6354df38811ae1..1a478b4a3857d6 100644 --- a/airflow/contrib/hooks/spark_submit_hook.py +++ b/airflow/contrib/hooks/spark_submit_hook.py @@ -227,7 +227,7 @@ def _build_spark_submit_command(self, application): """ connection_cmd = self._get_spark_binary_path() - # The url ot the spark master + # The url of the spark master connection_cmd += ["--master", self._connection['master']] if self._conf: @@ -236,6 +236,8 @@ def _build_spark_submit_command(self, application): if self._env_vars and (self._is_kubernetes or self._is_yarn): if self._is_yarn: tmpl = "spark.yarn.appMasterEnv.{}={}" + # Allow dynamic setting of hadoop/yarn configuration environments + self._env = self._env_vars else: tmpl = "spark.kubernetes.driverEnv.{}={}" for key in self._env_vars: diff --git a/tests/contrib/hooks/test_spark_submit_hook.py b/tests/contrib/hooks/test_spark_submit_hook.py index b73024cec48b27..f7165bea9e7574 100644 --- a/tests/contrib/hooks/test_spark_submit_hook.py +++ b/tests/contrib/hooks/test_spark_submit_hook.py @@ -489,6 +489,7 @@ def test_resolve_spark_submit_env_vars_yarn(self): # Then self.assertEqual(cmd[4], "spark.yarn.appMasterEnv.bar=foo") + self.assertEqual(hook._env, {"bar": "foo"}) def test_resolve_spark_submit_env_vars_k8s(self): # Given