From 54b0ac52c3b4afce50a96fc205c2de73962ac1eb Mon Sep 17 00:00:00 2001 From: Will Raschkowski Date: Tue, 23 Feb 2021 22:13:38 +0000 Subject: [PATCH] [SPARK-25200][YARN] Allow specifying HADOOP_CONF_DIR as spark property We use the InProcessLauncher internally [1] to launch to different YARN clusters. The clusters might need different configuration files, which we can't keep apart if the InProcessLauncher discovers config folders from the same HADOOP_CONF_DIR environment variable. This change allows us to specify different config directories using Spark config. See upstream PR [2] and ticket [3]. [1] https://pl.ntr/1UK [2] https://github.com/apache/spark/pull/22289 [1] https://issues.apache.org/jira/browse/SPARK-25200 Co-authored-by: Adam Balogh Co-authored-by: Robert Kruszewski Co-authored-by: Josh Casale Co-authored-by: Will Raschkowski --- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 6 ++++-- .../org/apache/spark/launcher/AbstractCommandBuilder.java | 1 + .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 8 ++++---- .../org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 3090a3b10a97c..e0a45560a4225 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -265,9 +265,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S if (master.startsWith("yarn")) { val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR") - if (!hasHadoopEnv && !Utils.isTesting) { + val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir") + if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) { error(s"When running with master '$master' " + - "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.") + "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment, +" + + "or spark.yarn.conf.dir in the spark properties.") } } diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index 3ae4633c79b04..94d242fb80f43 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -204,6 +204,7 @@ List buildClassPath(String appClassPath) throws IOException { addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); + addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return new ArrayList<>(cp); } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 6da6a8daad2b0..3fa46d869a0f9 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -710,11 +710,11 @@ private[spark] class Client( // SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs so that user's // environments do not interfere with tests. This allows a special env variable during // tests so that custom conf dirs can be used by unit tests. - val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++ - (if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil) + val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").flatMap(sys.env.get) ++ + sparkConf.getOption("spark.yarn.conf.dir") - confDirs.foreach { envKey => - sys.env.get(envKey).foreach { path => + confDirs.foreach { + path => { val dir = new File(path) if (dir.isDirectory()) { val files = dir.listFiles() diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index b7c9e83446012..efa8fd224ba36 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -136,8 +136,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite { val finalState = runSpark(false, mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass), appArgs = Seq("key=value", "spark.test.key=testvalue", result.getAbsolutePath()), - extraConf = Map("spark.hadoop.key" -> "value"), - extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> customConf.getAbsolutePath())) + extraConf = Map( + "spark.hadoop.key" -> "value", + "spark.yarn.conf.dir" -> customConf.getAbsolutePath)) checkResult(finalState, result) }