diff --git a/FORK.md b/FORK.md index d2dfa554663a5..8953ae2b3d519 100644 --- a/FORK.md +++ b/FORK.md @@ -1,5 +1,5 @@ # Difference with upstream - +* [SPARK-25200](https://issues.apache.org/jira/browse/SPARK-25200) - Allow setting HADOOP_CONF_DIR as a spark config diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 3090a3b10a97c..e0a45560a4225 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -265,9 +265,11 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S if (master.startsWith("yarn")) { val hasHadoopEnv = env.contains("HADOOP_CONF_DIR") || env.contains("YARN_CONF_DIR") - if (!hasHadoopEnv && !Utils.isTesting) { + val hasHadoopProp = sparkProperties.contains("spark.yarn.conf.dir") + if (!hasHadoopEnv && !hasHadoopProp && !Utils.isTesting) { error(s"When running with master '$master' " + - "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.") + "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment, +" + + "or spark.yarn.conf.dir in the spark properties.") } } diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index 3ae4633c79b04..94d242fb80f43 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -204,6 +204,7 @@ List buildClassPath(String appClassPath) throws IOException { addToClassPath(cp, getenv("HADOOP_CONF_DIR")); addToClassPath(cp, getenv("YARN_CONF_DIR")); + addToClassPath(cp, getEffectiveConfig().get("spark.yarn.conf.dir")); addToClassPath(cp, getenv("SPARK_DIST_CLASSPATH")); return new ArrayList<>(cp); } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 6da6a8daad2b0..3fa46d869a0f9 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -710,11 +710,11 @@ private[spark] class Client( // SPARK-23630: during testing, Spark scripts filter out hadoop conf dirs so that user's // environments do not interfere with tests. This allows a special env variable during // tests so that custom conf dirs can be used by unit tests. - val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR") ++ - (if (Utils.isTesting) Seq("SPARK_TEST_HADOOP_CONF_DIR") else Nil) + val confDirs = Seq("HADOOP_CONF_DIR", "YARN_CONF_DIR").flatMap(sys.env.get) ++ + sparkConf.getOption("spark.yarn.conf.dir") - confDirs.foreach { envKey => - sys.env.get(envKey).foreach { path => + confDirs.foreach { + path => { val dir = new File(path) if (dir.isDirectory()) { val files = dir.listFiles() diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 7ec15023c59b0..25364f0e2e132 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -136,8 +136,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite { val finalState = runSpark(false, mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass), appArgs = Seq("key=value", "spark.test.key=testvalue", result.getAbsolutePath()), - extraConf = Map("spark.hadoop.key" -> "value"), - extraEnv = Map("SPARK_TEST_HADOOP_CONF_DIR" -> customConf.getAbsolutePath())) + extraConf = Map( + "spark.hadoop.key" -> "value", + "spark.yarn.conf.dir" -> customConf.getAbsolutePath)) checkResult(finalState, result) }