Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-4924
Browse files Browse the repository at this point in the history
Conflicts:
	bin/compute-classpath.sh
	bin/utils.sh
	bin/windows-utils.cmd
	core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
	python/pyspark/java_gateway.py
	sbin/spark-daemon.sh
  • Loading branch information
Marcelo Vanzin committed Feb 19, 2015
2 parents de81da2 + 90095bf commit e50dc5e
Show file tree
Hide file tree
Showing 365 changed files with 11,984 additions and 4,606 deletions.
2 changes: 2 additions & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
target
cache
.gitignore
.gitattributes
.project
Expand All @@ -18,6 +19,7 @@ fairscheduler.xml.template
spark-defaults.conf.template
log4j.properties
log4j.properties.template
metrics.properties
metrics.properties.template
slaves
slaves.template
Expand Down
121 changes: 10 additions & 111 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
<deb.pkg.name>spark</deb.pkg.name>
<deb.install.path>/usr/share/spark</deb.install.path>
<deb.user>root</deb.user>
<deb.bin.filemode>755</deb.bin.filemode>
</properties>

<dependencies>
Expand Down Expand Up @@ -118,6 +114,16 @@
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
<filter>
<!-- Exclude libgfortran, libgcc for license issues -->
<artifact>org.jblas:jblas</artifact>
<excludes>
<!-- Linux amd64 is OK; not statically linked -->
<exclude>lib/static/Linux/i386/**</exclude>
<exclude>lib/static/Mac OS X/**</exclude>
<exclude>lib/static/Windows/**</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
Expand Down Expand Up @@ -217,113 +223,6 @@
</plugins>
</build>
</profile>
<profile>
<id>deb</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>buildnumber-maven-plugin</artifactId>
<version>1.2</version>
<executions>
<execution>
<phase>validate</phase>
<goals>
<goal>create</goal>
</goals>
<configuration>
<shortRevisionLength>8</shortRevisionLength>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.vafer</groupId>
<artifactId>jdeb</artifactId>
<version>0.11</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>jdeb</goal>
</goals>
<configuration>
<deb>${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb</deb>
<attach>false</attach>
<compression>gzip</compression>
<dataSet>
<data>
<src>${spark.jar}</src>
<type>file</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}/jars</prefix>
</mapper>
</data>
<data>
<src>${basedir}/src/deb/RELEASE</src>
<type>file</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}</prefix>
</mapper>
</data>
<data>
<src>${basedir}/../conf</src>
<type>directory</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}/conf</prefix>
<filemode>${deb.bin.filemode}</filemode>
</mapper>
</data>
<data>
<src>${basedir}/../bin</src>
<type>directory</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}/bin</prefix>
<filemode>${deb.bin.filemode}</filemode>
</mapper>
</data>
<data>
<src>${basedir}/../sbin</src>
<type>directory</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}/sbin</prefix>
<filemode>${deb.bin.filemode}</filemode>
</mapper>
</data>
<data>
<src>${basedir}/../python</src>
<type>directory</type>
<mapper>
<type>perm</type>
<user>${deb.user}</user>
<group>${deb.user}</group>
<prefix>${deb.install.path}/python</prefix>
<filemode>${deb.bin.filemode}</filemode>
</mapper>
</data>
</dataSet>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>kinesis-asl</id>
<dependencies>
Expand Down
2 changes: 0 additions & 2 deletions assembly/src/deb/RELEASE

This file was deleted.

8 changes: 0 additions & 8 deletions assembly/src/deb/control/control

This file was deleted.

4 changes: 2 additions & 2 deletions bin/run-example
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fi

JAR_COUNT=0

for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do
if [[ ! -e "$f" ]]; then
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
echo "You need to build Spark before running this program" 1>&2
Expand All @@ -54,7 +54,7 @@ done

if [ "$JAR_COUNT" -gt "1" ]; then
echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
ls "${JAR_PATH}"/spark-examples-*hadoop*.jar 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi
Expand Down
5 changes: 4 additions & 1 deletion build/mvn
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Preserve the calling directory
_CALLING_DIR="$(pwd)"
# Options used during compilation
_COMPILE_JVM_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"

# Installs any application tarball given a URL, the expected tarball name,
# and, optionally, a checkable binary path to determine if the binary has
Expand Down Expand Up @@ -136,14 +138,15 @@ cd "${_CALLING_DIR}"
# Now that zinc is ensured to be installed, check its status and, if its
# not running or just installed, start it
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"}
${ZINC_BIN} -shutdown
${ZINC_BIN} -start -port ${ZINC_PORT} \
-scala-compiler "${SCALA_COMPILER}" \
-scala-library "${SCALA_LIBRARY}" &>/dev/null
fi

# Set any `mvn` options if not already present
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}
export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}

# Last, call the `mvn` command as usual
${MVN_BIN} "$@"
10 changes: 0 additions & 10 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -334,16 +334,6 @@
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymockclassextension</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/resources/org/apache/spark/ui/static/webui.css
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ span.additional-metric-title {

/* Hide all additional metrics by default. This is done here rather than using JavaScript to
* avoid slow page loads for stage pages with large numbers (e.g., thousands) of tasks. */
.scheduler_delay, .deserialization_time, .fetch_wait_time, .serialization_time,
.getting_result_time {
.scheduler_delay, .deserialization_time, .fetch_wait_time, .shuffle_read_remote,
.serialization_time, .getting_result_time {
display: none;
}
9 changes: 9 additions & 0 deletions core/src/main/scala/org/apache/spark/HttpFileServer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ private[spark] class HttpFileServer(

def stop() {
httpServer.stop()

// If we only stop sc, but the driver process still run as a services then we need to delete
// the tmp dir, if not, it will create too many tmp dirs
try {
Utils.deleteRecursively(baseDir)
} catch {
case e: Exception =>
logWarning(s"Exception while deleting Spark temp dir: ${baseDir.getAbsolutePath}", e)
}
}

def addFile(file: File) : String = {
Expand Down
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/SecurityManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.hadoop.io.Text

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.network.sasl.SecretKeyHolder
import org.apache.spark.util.Utils

/**
* Spark class responsible for security.
Expand Down Expand Up @@ -203,7 +204,7 @@ private[spark] class SecurityManager(sparkConf: SparkConf)

// always add the current user and SPARK_USER to the viewAcls
private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
Option(System.getenv("SPARK_USER")).getOrElse("")).filter(!_.isEmpty)
Utils.getCurrentUserName())

setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))
Expand Down
39 changes: 25 additions & 14 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli

// log out Spark Version in Spark driver log
logInfo(s"Running Spark version $SPARK_VERSION")

private[spark] val conf = config.clone()
conf.validateSettings()

Expand Down Expand Up @@ -249,7 +249,16 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)

// Create the Spark execution environment (cache, map output tracker, etc)
private[spark] val env = SparkEnv.createDriverEnv(conf, isLocal, listenerBus)

// This function allows components created by SparkEnv to be mocked in unit tests:
private[spark] def createSparkEnv(
conf: SparkConf,
isLocal: Boolean,
listenerBus: LiveListenerBus): SparkEnv = {
SparkEnv.createDriverEnv(conf, isLocal, listenerBus)
}

private[spark] val env = createSparkEnv(conf, isLocal, listenerBus)
SparkEnv.set(env)

// Used to store a URL for each static file/jar together with the file's local timestamp
Expand Down Expand Up @@ -335,11 +344,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
executorEnvs ++= conf.getExecutorEnv

// Set SPARK_USER for user who is running SparkContext.
val sparkUser = Option {
Option(System.getenv("SPARK_USER")).getOrElse(System.getProperty("user.name"))
}.getOrElse {
SparkContext.SPARK_UNKNOWN_USER
}
val sparkUser = Utils.getCurrentUserName()
executorEnvs("SPARK_USER") = sparkUser

// Create and start the scheduler
Expand Down Expand Up @@ -826,7 +831,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
vClass: Class[V],
conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
assertNotStopped()
// The call to new NewHadoopJob automatically adds security credentials to conf,
// The call to new NewHadoopJob automatically adds security credentials to conf,
// so we don't need to explicitly add them ourselves
val job = new NewHadoopJob(conf)
NewFileInputFormat.addInputPath(job, new Path(path))
Expand Down Expand Up @@ -956,11 +961,18 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
}

/** Build the union of a list of RDDs. */
def union[T: ClassTag](rdds: Seq[RDD[T]]): RDD[T] = new UnionRDD(this, rdds)
def union[T: ClassTag](rdds: Seq[RDD[T]]): RDD[T] = {
val partitioners = rdds.flatMap(_.partitioner).toSet
if (partitioners.size == 1) {
new PartitionerAwareUnionRDD(this, rdds)
} else {
new UnionRDD(this, rdds)
}
}

/** Build the union of a list of RDDs passed as variable-length arguments. */
def union[T: ClassTag](first: RDD[T], rest: RDD[T]*): RDD[T] =
new UnionRDD(this, Seq(first) ++ rest)
union(Seq(first) ++ rest)

/** Get an RDD that has no partitions or elements. */
def emptyRDD[T: ClassTag] = new EmptyRDD[T](this)
Expand Down Expand Up @@ -1358,6 +1370,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
cleaner.foreach(_.stop())
dagScheduler.stop()
dagScheduler = null
progressBar.foreach(_.stop())
taskScheduler = null
// TODO: Cache.stop()?
env.stop()
Expand Down Expand Up @@ -1626,8 +1639,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
@deprecated("use defaultMinPartitions", "1.0.0")
def defaultMinSplits: Int = math.min(defaultParallelism, 2)

/**
* Default min number of partitions for Hadoop RDDs when not given by user
/**
* Default min number of partitions for Hadoop RDDs when not given by user
* Notice that we use math.min so the "defaultMinPartitions" cannot be higher than 2.
* The reasons for this are discussed in https://github.com/mesos/spark/pull/718
*/
Expand Down Expand Up @@ -1844,8 +1857,6 @@ object SparkContext extends Logging {

private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = "spark.job.interruptOnCancel"

private[spark] val SPARK_UNKNOWN_USER = "<unknown>"

private[spark] val DRIVER_IDENTIFIER = "<driver>"

// The following deprecated objects have already been copied to `object AccumulatorParam` to
Expand Down
Loading

0 comments on commit e50dc5e

Please sign in to comment.