apache · mgummelt · Feb 10, 2016 · Feb 10, 2016 · Apr 17, 2017 · Apr 17, 2017
diff --git a/core/pom.xml b/core/pom.xml
@@ -357,6 +357,34 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
     </dependency>
+
+    <!--
+     Testing Hive reflection needs hive on the test classpath only.
+     It doesn't need the spark hive modules, so the -Phive flag is not checked.
+      -->
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-exec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-metastore</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libfb303</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/ConfigurableCredentialManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/ConfigurableCredentialManager.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.security.Credentials
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+
+/**
+ * A ConfigurableCredentialManager to manage all the registered credential providers and offer
+ * APIs for other modules to obtain credentials as well as renewal time. By default
+ * [[HadoopFSCredentialProvider]], [[HiveCredentialProvider]] and [[HBaseCredentialProvider]] will
+ * be loaded in if not explicitly disabled, any plugged-in credential provider wants to be
+ * managed by ConfigurableCredentialManager needs to implement [[HadoopDelegationTokenProvider]]
+ * interface and put into resources/META-INF/services to be loaded by ServiceLoader.
+ *
+ * Also each credential provider is controlled by
+ * spark.security.credentials.{service}.enabled, it will not be loaded in if set to false.
+ * For example, Hive's credential provider [[HiveCredentialProvider]] can be enabled/disabled by
+ * the configuration spark.security.credentials.hive.enabled.
+ *
+ * @param sparkConf Spark configuration
+ * @param hadoopConf Hadoop configuration
+ * @param fileSystems Delegation tokens will be fetched for these Hadoop filesystems.
+ */
+private[spark] class ConfigurableCredentialManager(
+    sparkConf: SparkConf,
+    hadoopConf: Configuration,
+    fileSystems: Set[FileSystem])
+  extends Logging {
+
+  private val deprecatedProviderEnabledConfigs = List(
+    "spark.yarn.security.tokens.%s.enabled",
+    "spark.yarn.security.credentials.%s.enabled")
+  private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
+
+  // Maintain all the registered credential providers
+  private val credentialProviders = getCredentialProviders
+  logDebug(s"Using the following credential providers: ${credentialProviders.keys.mkString(", ")}.")
+
+  private def getCredentialProviders: Map[String, HadoopDelegationTokenProvider] = {
+    val providers = List(new HadoopFSCredentialProvider(fileSystems),
+      new HiveCredentialProvider,
+      new HBaseCredentialProvider)
+
+    // Filter out credentials in which spark.security.credentials.{service}.enabled is false.
+    providers
+      .filter(p => isServiceEnabled(p.serviceName))
+      .map(p => (p.serviceName, p))
+      .toMap
+  }
+
+  def isServiceEnabled(serviceName: String): Boolean = {
+    val key = providerEnabledConfig.format(serviceName)
+
+    deprecatedProviderEnabledConfigs.foreach { pattern =>
+      val deprecatedKey = pattern.format(serviceName)
+      if (sparkConf.contains(deprecatedKey)) {
+        logWarning(s"${deprecatedKey} is deprecated.  Please use ${key} instead.")
+      }
+    }
+
+    val isEnabledDeprecated = deprecatedProviderEnabledConfigs.forall { pattern =>
+      sparkConf
+        .getOption(pattern.format(serviceName))
+        .map(_.toBoolean)
+        .getOrElse(true)
+    }
+
+    sparkConf
+      .getOption(key)
+      .map(_.toBoolean)
+      .getOrElse(isEnabledDeprecated)
+  }
+
+  /**
+   * Get credential provider for the specified service.
+   */
+  def getServiceCredentialProvider(service: String): Option[HadoopDelegationTokenProvider] = {
+    credentialProviders.get(service)
+  }
+
+  /**
+   * Writes delegation tokens to creds.  Delegation tokens are fetched from all registered
+   * providers.
+   *
+   * @return Time after which the fetched delegation tokens should be renewed.
+   */
+  def obtainCredentials(
+      hadoopConf: Configuration,
+      creds: Credentials): Long = {
+    credentialProviders.values.flatMap { provider =>
+      if (provider.credentialsRequired(hadoopConf)) {
+        provider.obtainCredentials(hadoopConf, creds)
+      } else {
+        logDebug(s"Service ${provider.serviceName} does not require a token." +
+          s" Check your configuration to see if security is disabled or not.")
+        None
+      }
+    }.foldLeft(Long.MaxValue)(math.min)
+  }
+}
diff --git a/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala b/...rn/security/HBaseCredentialProvider.scala → ...oy/security/HBaseCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import scala.reflect.runtime.universe
 import scala.util.control.NonFatal
@@ -24,17 +24,16 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.security.Credentials
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 
-import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
-private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {
+private[security] class HBaseCredentialProvider
+  extends HadoopDelegationTokenProvider with Logging {
 
   override def serviceName: String = "hbase"
 
   override def obtainCredentials(
       hadoopConf: Configuration,
-      sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
     try {
       val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenProvider.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.Credentials
+
+/**
+ * Hadoop delegation token provider.
+ */
+private[spark] trait HadoopDelegationTokenProvider {
+
+  /**
+   * Name of the service to provide credentials. This name should unique, Spark internally will
+   * use this name to differentiate credential provider.
+   */
+  def serviceName: String
+
+  /**
+   * Returns true if credentials are required for this service. By default, it is based on whether
+   * Hadoop security is enabled.
+   */
+  def credentialsRequired(hadoopConf: Configuration): Boolean
+
+  /**
+   * Obtain credentials for this service and get the time of the next renewal.
+   * @param hadoopConf Configuration of current Hadoop Compatible system.
+   * @param creds Credentials to add tokens and security keys to.
+   * @return If this Credential is renewable and can be renewed, return the time of the next
+   *         renewal, otherwise None should be returned.
+   */
+  def obtainCredentials(
+    hadoopConf: Configuration,
+    creds: Credentials): Option[Long]
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSCredentialProvider.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.security
+
+import scala.collection.JavaConverters._
+import scala.util.Try
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.mapred.Master
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
+
+import org.apache.spark.SparkException
+import org.apache.spark.internal.Logging
+
+
+private[deploy] class HadoopFSCredentialProvider(fileSystems: Set[FileSystem])
+    extends HadoopDelegationTokenProvider with Logging {
+  // Token renewal interval, this value will be set in the first call,
+  // if None means no token renewer specified or no token can be renewed,
+  // so cannot get token renewal interval.
+  private var tokenRenewalInterval: Option[Long] = null
+
+  override val serviceName: String = "hadoopfs"
+
+  override def obtainCredentials(
+      hadoopConf: Configuration,
+      creds: Credentials): Option[Long] = {
+
+    val newCreds = fetchDelegationTokens(
+      getTokenRenewer(hadoopConf),
+      fileSystems)
+
+    // Get the token renewal interval if it is not set. It will only be called once.
+    if (tokenRenewalInterval == null) {
+      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, fileSystems)
+    }
+
+    // Get the time of next renewal.
+    val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
+      val nextRenewalDates = newCreds.getAllTokens.asScala
+        .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
+        .map { token =>
+          val identifier = token
+            .decodeIdentifier()
+            .asInstanceOf[AbstractDelegationTokenIdentifier]
+          identifier.getIssueDate + interval
+        }
+      if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
+    }
+
+    creds.addAll(newCreds)
+    nextRenewalDate
+  }
+
+  def credentialsRequired(hadoopConf: Configuration): Boolean = {
+    UserGroupInformation.isSecurityEnabled
+  }
+
+  private def getTokenRenewer(hadoopConf: Configuration): String = {
+    val tokenRenewer = Master.getMasterPrincipal(hadoopConf)
+    logDebug("Delegation token renewer is: " + tokenRenewer)
+
+    if (tokenRenewer == null || tokenRenewer.length() == 0) {
+      val errorMessage = "Can't get Master Kerberos principal for use as renewer."
+      logError(errorMessage)
+      throw new SparkException(errorMessage)
+    }
+
+    tokenRenewer
+  }
+
+  private def fetchDelegationTokens(
+    renewer: String,
+    filesystems: Set[FileSystem]): Credentials = {
+    val creds = new Credentials()
+
+    filesystems.foreach { fs =>
+      logInfo("getting token for: " + fs)
+      fs.addDelegationTokens(renewer, creds)
+    }
+
+    creds
+  }
+
+  private def getTokenRenewalInterval(
+    hadoopConf: Configuration,
+    filesystems: Set[FileSystem]): Option[Long] = {
+    // We cannot use the tokens generated with renewer yarn. Trying to renew
+    // those will fail with an access control issue. So create new tokens with the logged in
+    // user as renewer.
+    val creds = fetchDelegationTokens(
+      UserGroupInformation.getCurrentUser.getUserName,
+      filesystems)
+
+    val renewIntervals = creds.getAllTokens.asScala.filter {
+      _.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier]
+    }.flatMap { token =>
+      Try {
+        val newExpiration = token.renew(hadoopConf)
+        val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
+        val interval = newExpiration - identifier.getIssueDate
+        logInfo(s"Renewal interval is $interval for token ${token.getKind.toString}")
+        interval
+      }.toOption
+    }
+    if (renewIntervals.isEmpty) None else Some(renewIntervals.min)
+  }
+}
diff --git a/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala b/...arn/security/HiveCredentialProvider.scala → ...loy/security/HiveCredentialProvider.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy.yarn.security
+package org.apache.spark.deploy.security
 
 import java.lang.reflect.UndeclaredThrowableException
 import java.security.PrivilegedExceptionAction
@@ -29,11 +29,10 @@ import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.Token
 
-import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
-private[security] class HiveCredentialProvider extends ServiceCredentialProvider with Logging {
+private[security] class HiveCredentialProvider extends HadoopDelegationTokenProvider with Logging {
 
   override def serviceName: String = "hive"
 
@@ -62,7 +61,6 @@ private[security] class HiveCredentialProvider extends ServiceCredentialProvider
 
   override def obtainCredentials(
       hadoopConf: Configuration,
-      sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
     val conf = hiveConf(hadoopConf)