diff --git a/assembly/pom.xml b/assembly/pom.xml
index d3bb4bde0c412..f1f8b0d3682e2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 1fe61062b4606..1f3dec91314f2 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/bin/pyspark b/bin/pyspark
index e7f6a1a072c2a..776b28dc41099 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -89,7 +89,6 @@ export PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
 if [[ -n "$SPARK_TESTING" ]]; then
   unset YARN_CONF_DIR
   unset HADOOP_CONF_DIR
-  export PYSPARK_SUBMIT_ARGS=pyspark-shell
   if [[ -n "$PYSPARK_DOC_TEST" ]]; then
     exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
   else
diff --git a/core/pom.xml b/core/pom.xml
index 4164a3a7208d4..868834dd505ef 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -275,7 +275,7 @@
     <dependency>
       <groupId>org.tachyonproject</groupId>
       <artifactId>tachyon-client</artifactId>
-      <version>0.5.0</version>
+      <version>0.6.1</version>
       <exclusions>
         <exclusion>
           <groupId>org.apache.hadoop</groupId>
@@ -414,7 +414,7 @@
               <overWriteIfNewer>true</overWriteIfNewer>
               <useSubDirectoryPerType>true</useSubDirectoryPerType>
               <includeArtifactIds>
-                guava,jetty-io,jetty-servlet,jetty-continuation,jetty-http,jetty-plus,jetty-util,jetty-server
+                guava,jetty-io,jetty-servlet,jetty-continuation,jetty-http,jetty-plus,jetty-util,jetty-server,jetty-security
               </includeArtifactIds>
               <silent>true</silent>
             </configuration>
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 0c59a61e81393..9b05c9623b704 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -145,7 +145,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Keep cleaning RDD, shuffle, and broadcast state. */
-  private def keepCleaning(): Unit = Utils.logUncaughtExceptions {
+  private def keepCleaning(): Unit = Utils.tryOrStopSparkContext(sc) {
     while (!stopped) {
       try {
         val reference = Option(referenceQueue.remove(ContextCleaner.REF_QUEUE_POLL_TIMEOUT))
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8121aab3b0b34..228ff715fe7cb 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1093,7 +1093,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   def addFile(path: String, recursive: Boolean): Unit = {
     val uri = new URI(path)
     val schemeCorrectedPath = uri.getScheme match {
-      case null | "local" => "file:" + uri.getPath
+      case null | "local" => new File(path).getCanonicalFile.toURI.toString
       case _              => path
     }
 
@@ -1736,7 +1736,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
         }
     }
 
-    listenerBus.start()
+    listenerBus.start(this)
   }
 
   /** Post the application start event */
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 29a5cd5fdac76..48fd3e7e23d52 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -151,11 +151,7 @@ case object TaskKilled extends TaskFailedReason {
  * Task requested the driver to commit, but was denied.
  */
 @DeveloperApi
-case class TaskCommitDenied(
-    jobID: Int,
-    partitionID: Int,
-    attemptID: Int)
-  extends TaskFailedReason {
+case class TaskCommitDenied(jobID: Int, partitionID: Int, attemptID: Int) extends TaskFailedReason {
   override def toErrorString: String = s"TaskCommitDenied (Driver denied task commit)" +
     s" for job: $jobID, partition: $partitionID, attempt: $attemptID"
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala
index 0bf1e4a5e2ccd..c415fe99b105e 100644
--- a/core/src/main/scala/org/apache/spark/TaskState.scala
+++ b/core/src/main/scala/org/apache/spark/TaskState.scala
@@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration {
 
   type TaskState = Value
 
+  def isFailed(state: TaskState) = (LOST == state) || (FAILED == state)
+
   def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
 
   def toMesos(state: TaskState): MesosTaskState = state match {
@@ -46,5 +48,6 @@ private[spark] object TaskState extends Enumeration {
     case MesosTaskState.TASK_FAILED => FAILED
     case MesosTaskState.TASK_KILLED => KILLED
     case MesosTaskState.TASK_LOST => LOST
+    case MesosTaskState.TASK_ERROR => LOST
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 8e8f7f6c4fda2..79e4ebf2db578 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -32,7 +32,8 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.StatCounter
 import org.apache.spark.util.Utils
 
-class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, JavaDoubleRDD] {
+class JavaDoubleRDD(val srdd: RDD[scala.Double])
+  extends AbstractJavaRDDLike[JDouble, JavaDoubleRDD] {
 
   override val classTag: ClassTag[JDouble] = implicitly[ClassTag[JDouble]]
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 7af3538262fd6..a023712be1166 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -39,12 +39,13 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
 import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
+import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
                        (implicit val kClassTag: ClassTag[K], implicit val vClassTag: ClassTag[V])
-  extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] {
+  extends AbstractJavaRDDLike[(K, V), JavaPairRDD[K, V]] {
 
   override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
 
@@ -227,24 +228,51 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
    * - `mergeCombiners`, to combine two C's into a single one.
    *
-   * In addition, users can control the partitioning of the output RDD, and whether to perform
-   * map-side aggregation (if a mapper can produce multiple items with the same key).
+   * In addition, users can control the partitioning of the output RDD, the serializer that is use
+   * for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
+   * items with the same key).
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
-    mergeValue: JFunction2[C, V, C],
-    mergeCombiners: JFunction2[C, C, C],
-    partitioner: Partitioner): JavaPairRDD[K, C] = {
-    implicit val ctag: ClassTag[C] = fakeClassTag
+      mergeValue: JFunction2[C, V, C],
+      mergeCombiners: JFunction2[C, C, C],
+      partitioner: Partitioner,
+      mapSideCombine: Boolean,
+      serializer: Serializer): JavaPairRDD[K, C] = {
+      implicit val ctag: ClassTag[C] = fakeClassTag
     fromRDD(rdd.combineByKey(
       createCombiner,
       mergeValue,
       mergeCombiners,
-      partitioner
+      partitioner,
+      mapSideCombine,
+      serializer
     ))
   }
 
   /**
-   * Simplified version of combineByKey that hash-partitions the output RDD.
+   * Generic function to combine the elements for each key using a custom set of aggregation
+   * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
+   * "combined type" C * Note that V and C can be different -- for example, one might group an
+   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
+   * functions:
+   *
+   * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
+   * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
+   * - `mergeCombiners`, to combine two C's into a single one.
+   *
+   * In addition, users can control the partitioning of the output RDD. This method automatically
+   * uses map-side aggregation in shuffling the RDD.
+   */
+  def combineByKey[C](createCombiner: JFunction[V, C],
+      mergeValue: JFunction2[C, V, C],
+      mergeCombiners: JFunction2[C, C, C],
+      partitioner: Partitioner): JavaPairRDD[K, C] = {
+    combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner, true, null)
+  }
+
+  /**
+   * Simplified version of combineByKey that hash-partitions the output RDD and uses map-side
+   * aggregation.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
       mergeValue: JFunction2[C, V, C],
@@ -488,7 +516,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
 
   /**
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
-   * partitioner/parallelism level.
+   * partitioner/parallelism level and using map-side aggregation.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
     mergeValue: JFunction2[C, V, C],
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 86fb374bef1e3..3e9beb670f7ad 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -30,7 +30,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
-  extends JavaRDDLike[T, JavaRDD[T]] {
+  extends AbstractJavaRDDLike[T, JavaRDD[T]] {
 
   override def wrapRDD(rdd: RDD[T]): JavaRDD[T] = JavaRDD.fromRDD(rdd)
 
@@ -101,12 +101,23 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's size
+   *  without replacement: probability that each element is chosen; fraction must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
     sample(withReplacement, fraction, Utils.random.nextLong)
     
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's size
+   *  without replacement: probability that each element is chosen; fraction must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   * @param seed seed for the random number generator
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 0f91c942ecd50..8da42934a7d96 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -38,6 +38,14 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
+/**
+ * As a workaround for https://issues.scala-lang.org/browse/SI-8905, implementations
+ * of JavaRDDLike should extend this dummy abstract class instead of directly inheriting
+ * from the trait. See SPARK-3266 for additional details.
+ */
+private[spark] abstract class AbstractJavaRDDLike[T, This <: JavaRDDLike[T, This]]
+  extends JavaRDDLike[T, This]
+
 /**
  * Defines operations common to several Java RDD implementations.
  * Note that this trait is not intended to be implemented by user code.
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index 415bd50591692..53bc62aff7395 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -28,7 +28,7 @@ import org.apache.spark.util.{IntParam, MemoryParam}
 /**
  * Command-line parser for the driver client.
  */
-private[spark] class ClientArguments(args: Array[String]) {
+private[deploy] class ClientArguments(args: Array[String]) {
   import ClientArguments._
 
   var cmd: String = "" // 'launch' or 'kill'
@@ -96,7 +96,7 @@ private[spark] class ClientArguments(args: Array[String]) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int) {
     // TODO: It wouldn't be too hard to allow users to submit their app and dependency jars
     //       separately similar to in the YARN client.
     val usage =
@@ -116,10 +116,10 @@ private[spark] class ClientArguments(args: Array[String]) {
   }
 }
 
-object ClientArguments {
-  private[spark] val DEFAULT_CORES = 1
-  private[spark] val DEFAULT_MEMORY = 512 // MB
-  private[spark] val DEFAULT_SUPERVISE = false
+private[deploy] object ClientArguments {
+  val DEFAULT_CORES = 1
+  val DEFAULT_MEMORY = 512 // MB
+  val DEFAULT_SUPERVISE = false
 
   def isValidJarUrl(s: String): Boolean = {
     try {
diff --git a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
index b056a19ce6598..659fb434a80f5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-private[spark] class DriverDescription(
+private[deploy] class DriverDescription(
     val jarUrl: String,
     val mem: Int,
     val cores: Int,
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
index 2abf0b69dddb3..ec23371b52f93 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
@@ -22,7 +22,7 @@ package org.apache.spark.deploy
  * This state is sufficient for the Master to reconstruct its internal data structures during
  * failover.
  */
-private[spark] class ExecutorDescription(
+private[deploy] class ExecutorDescription(
     val appId: String,
     val execId: Int,
     val cores: Int,
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
index 9f34d01e6db48..efa88c62e1f5d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-private[spark] object ExecutorState extends Enumeration {
+private[deploy] object ExecutorState extends Enumeration {
 
   val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST, EXITED = Value
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 47dbcd87c35b5..5668b53fc6f4f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -33,6 +33,7 @@ import org.json4s.jackson.JsonMethods
 
 import org.apache.spark.{Logging, SparkConf, SparkContext}
 import org.apache.spark.deploy.master.{RecoveryState, SparkCuratorUtil}
+import org.apache.spark.util.Utils
 
 /**
  * This suite tests the fault tolerance of the Spark standalone scheduler, mainly the Master.
@@ -55,29 +56,29 @@ import org.apache.spark.deploy.master.{RecoveryState, SparkCuratorUtil}
  *   - The docker images tagged spark-test-master and spark-test-worker are built from the
  *     docker/ directory. Run 'docker/spark-test/build' to generate these.
  */
-private[spark] object FaultToleranceTest extends App with Logging {
+private object FaultToleranceTest extends App with Logging {
 
-  val conf = new SparkConf()
-  val ZK_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark")
+  private val conf = new SparkConf()
+  private val ZK_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark")
 
-  val masters = ListBuffer[TestMasterInfo]()
-  val workers = ListBuffer[TestWorkerInfo]()
-  var sc: SparkContext = _
+  private val masters = ListBuffer[TestMasterInfo]()
+  private val workers = ListBuffer[TestWorkerInfo]()
+  private var sc: SparkContext = _
 
-  val zk =  SparkCuratorUtil.newClient(conf)
+  private val zk =  SparkCuratorUtil.newClient(conf)
 
-  var numPassed = 0
-  var numFailed = 0
+  private var numPassed = 0
+  private var numFailed = 0
 
-  val sparkHome = System.getenv("SPARK_HOME")
+  private val sparkHome = System.getenv("SPARK_HOME")
   assertTrue(sparkHome != null, "Run with a valid SPARK_HOME")
 
-  val containerSparkHome = "/opt/spark"
-  val dockerMountDir = "%s:%s".format(sparkHome, containerSparkHome)
+  private val containerSparkHome = "/opt/spark"
+  private val dockerMountDir = "%s:%s".format(sparkHome, containerSparkHome)
 
   System.setProperty("spark.driver.host", "172.17.42.1") // default docker host ip
 
-  def afterEach() {
+  private def afterEach() {
     if (sc != null) {
       sc.stop()
       sc = null
@@ -179,7 +180,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
     }
   }
 
-  def test(name: String)(fn: => Unit) {
+  private def test(name: String)(fn: => Unit) {
     try {
       fn
       numPassed += 1
@@ -197,19 +198,19 @@ private[spark] object FaultToleranceTest extends App with Logging {
     afterEach()
   }
 
-  def addMasters(num: Int) {
+  private def addMasters(num: Int) {
     logInfo(s">>>>> ADD MASTERS $num <<<<<")
     (1 to num).foreach { _ => masters += SparkDocker.startMaster(dockerMountDir) }
   }
 
-  def addWorkers(num: Int) {
+  private def addWorkers(num: Int) {
     logInfo(s">>>>> ADD WORKERS $num <<<<<")
     val masterUrls = getMasterUrls(masters)
     (1 to num).foreach { _ => workers += SparkDocker.startWorker(dockerMountDir, masterUrls) }
   }
 
   /** Creates a SparkContext, which constructs a Client to interact with our cluster. */
-  def createClient() = {
+  private def createClient() = {
     logInfo(">>>>> CREATE CLIENT <<<<<")
     if (sc != null) { sc.stop() }
     // Counter-hack: Because of a hack in SparkEnv#create() that changes this
@@ -218,17 +219,17 @@ private[spark] object FaultToleranceTest extends App with Logging {
     sc = new SparkContext(getMasterUrls(masters), "fault-tolerance", containerSparkHome)
   }
 
-  def getMasterUrls(masters: Seq[TestMasterInfo]): String = {
+  private def getMasterUrls(masters: Seq[TestMasterInfo]): String = {
     "spark://" + masters.map(master => master.ip + ":7077").mkString(",")
   }
 
-  def getLeader: TestMasterInfo = {
+  private def getLeader: TestMasterInfo = {
     val leaders = masters.filter(_.state == RecoveryState.ALIVE)
     assertTrue(leaders.size == 1)
     leaders(0)
   }
 
-  def killLeader(): Unit = {
+  private def killLeader(): Unit = {
     logInfo(">>>>> KILL LEADER <<<<<")
     masters.foreach(_.readState())
     val leader = getLeader
@@ -236,9 +237,9 @@ private[spark] object FaultToleranceTest extends App with Logging {
     leader.kill()
   }
 
-  def delay(secs: Duration = 5.seconds) = Thread.sleep(secs.toMillis)
+  private def delay(secs: Duration = 5.seconds) = Thread.sleep(secs.toMillis)
 
-  def terminateCluster() {
+  private def terminateCluster() {
     logInfo(">>>>> TERMINATE CLUSTER <<<<<")
     masters.foreach(_.kill())
     workers.foreach(_.kill())
@@ -247,7 +248,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
   }
 
   /** This includes Client retry logic, so it may take a while if the cluster is recovering. */
-  def assertUsable() = {
+  private def assertUsable() = {
     val f = future {
       try {
         val res = sc.parallelize(0 until 10).collect()
@@ -269,7 +270,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
    * Asserts that the cluster is usable and that the expected masters and workers
    * are all alive in a proper configuration (e.g., only one leader).
    */
-  def assertValidClusterState() = {
+  private def assertValidClusterState() = {
     logInfo(">>>>> ASSERT VALID CLUSTER STATE <<<<<")
     assertUsable()
     var numAlive = 0
@@ -325,7 +326,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
     }
   }
 
-  def assertTrue(bool: Boolean, message: String = "") {
+  private def assertTrue(bool: Boolean, message: String = "") {
     if (!bool) {
       throw new IllegalStateException("Assertion failed: " + message)
     }
@@ -335,7 +336,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
     numFailed))
 }
 
-private[spark] class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile: File)
+private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile: File)
   extends Logging  {
 
   implicit val formats = org.json4s.DefaultFormats
@@ -377,7 +378,7 @@ private[spark] class TestMasterInfo(val ip: String, val dockerId: DockerId, val
       format(ip, dockerId.id, logFile.getAbsolutePath, state)
 }
 
-private[spark] class TestWorkerInfo(val ip: String, val dockerId: DockerId, val logFile: File)
+private class TestWorkerInfo(val ip: String, val dockerId: DockerId, val logFile: File)
   extends Logging {
 
   implicit val formats = org.json4s.DefaultFormats
@@ -390,7 +391,7 @@ private[spark] class TestWorkerInfo(val ip: String, val dockerId: DockerId, val
     "[ip=%s, id=%s, logFile=%s]".format(ip, dockerId, logFile.getAbsolutePath)
 }
 
-private[spark] object SparkDocker {
+private object SparkDocker {
   def startMaster(mountDir: String): TestMasterInfo = {
     val cmd = Docker.makeRunCmd("spark-test-master", mountDir = mountDir)
     val (ip, id, outFile) = startNode(cmd)
@@ -405,8 +406,7 @@ private[spark] object SparkDocker {
 
   private def startNode(dockerCmd: ProcessBuilder) : (String, DockerId, File) = {
     val ipPromise = promise[String]()
-    val outFile = File.createTempFile("fault-tolerance-test", "")
-    outFile.deleteOnExit()
+    val outFile = File.createTempFile("fault-tolerance-test", "", Utils.createTempDir())
     val outStream: FileWriter = new FileWriter(outFile)
     def findIpAndLog(line: String): Unit = {
       if (line.startsWith("CONTAINER_IP=")) {
@@ -425,11 +425,11 @@ private[spark] object SparkDocker {
   }
 }
 
-private[spark] class DockerId(val id: String) {
+private class DockerId(val id: String) {
   override def toString = id
 }
 
-private[spark] object Docker extends Logging {
+private object Docker extends Logging {
   def makeRunCmd(imageTag: String, args: String = "", mountDir: String = ""): ProcessBuilder = {
     val mountCmd = if (mountDir != "") { " -v " + mountDir } else ""
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 696f32a6f5730..458a7c3a455de 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -23,7 +23,7 @@ import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateR
 import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
 import org.apache.spark.deploy.worker.ExecutorRunner
 
-private[spark] object JsonProtocol {
+private[deploy] object JsonProtocol {
  def writeWorkerInfo(obj: WorkerInfo) = {
    ("id" -> obj.id) ~
    ("host" -> obj.host) ~
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 4a74641f4e1fa..4f506be63fe59 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, U
  * Whether to submit, kill, or request the status of an application.
  * The latter two operations are currently supported only for standalone cluster mode.
  */
-private[spark] object SparkSubmitAction extends Enumeration {
+private[deploy] object SparkSubmitAction extends Enumeration {
   type SparkSubmitAction = Value
   val SUBMIT, KILL, REQUEST_STATUS = Value
 }
@@ -137,7 +137,7 @@ object SparkSubmit {
    * Second, we use this launch environment to invoke the main method of the child
    * main class.
    */
-  private[spark] def submit(args: SparkSubmitArguments): Unit = {
+  private def submit(args: SparkSubmitArguments): Unit = {
     val (childArgs, childClasspath, sysProps, childMainClass) = prepareSubmitEnvironment(args)
 
     def doRunMain(): Unit = {
@@ -199,7 +199,7 @@ object SparkSubmit {
    *   (4) the main class for the child
    * Exposed for testing.
    */
-  private[spark] def prepareSubmitEnvironment(args: SparkSubmitArguments)
+  private[deploy] def prepareSubmitEnvironment(args: SparkSubmitArguments)
       : (Seq[String], Seq[String], Map[String, String], String) = {
     // Return values
     val childArgs = new ArrayBuffer[String]()
@@ -598,32 +598,32 @@ object SparkSubmit {
   /**
    * Return whether the given primary resource represents a shell.
    */
-  private[spark] def isShell(primaryResource: String): Boolean = {
+  private[deploy] def isShell(primaryResource: String): Boolean = {
     primaryResource == SPARK_SHELL || primaryResource == PYSPARK_SHELL
   }
 
   /**
    * Return whether the given main class represents a sql shell.
    */
-  private[spark] def isSqlShell(mainClass: String): Boolean = {
+  private def isSqlShell(mainClass: String): Boolean = {
     mainClass == "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
   }
 
   /**
    * Return whether the given main class represents a thrift server.
    */
-  private[spark] def isThriftServer(mainClass: String): Boolean = {
+  private def isThriftServer(mainClass: String): Boolean = {
     mainClass == "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
   }
 
   /**
    * Return whether the given primary resource requires running python.
    */
-  private[spark] def isPython(primaryResource: String): Boolean = {
+  private[deploy] def isPython(primaryResource: String): Boolean = {
     primaryResource.endsWith(".py") || primaryResource == PYSPARK_SHELL
   }
 
-  private[spark] def isInternal(primaryResource: String): Boolean = {
+  private[deploy] def isInternal(primaryResource: String): Boolean = {
     primaryResource == SPARK_INTERNAL
   }
 
@@ -631,7 +631,7 @@ object SparkSubmit {
    * Merge a sequence of comma-separated file lists, some of which may be null to indicate
    * no files, into a single comma-separated string.
    */
-  private[spark] def mergeFileLists(lists: String*): String = {
+  private def mergeFileLists(lists: String*): String = {
     val merged = lists.filter(_ != null)
                       .flatMap(_.split(","))
                       .mkString(",")
@@ -640,10 +640,10 @@ object SparkSubmit {
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
-private[spark] object SparkSubmitUtils {
+private[deploy] object SparkSubmitUtils {
 
   // Exposed for testing
-  private[spark] var printStream = SparkSubmit.printStream
+  var printStream = SparkSubmit.printStream
 
   /**
    * Represents a Maven Coordinate
@@ -651,7 +651,7 @@ private[spark] object SparkSubmitUtils {
    * @param artifactId the artifactId of the coordinate
    * @param version the version of the coordinate
    */
-  private[spark] case class MavenCoordinate(groupId: String, artifactId: String, version: String)
+  private[deploy] case class MavenCoordinate(groupId: String, artifactId: String, version: String)
 
 /**
  * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided
@@ -659,7 +659,7 @@ private[spark] object SparkSubmitUtils {
  * @param coordinates Comma-delimited string of maven coordinates
  * @return Sequence of Maven coordinates
  */
-  private[spark] def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
+  def extractMavenCoordinates(coordinates: String): Seq[MavenCoordinate] = {
     coordinates.split(",").map { p =>
       val splits = p.replace("/", ":").split(":")
       require(splits.length == 3, s"Provided Maven Coordinates must be in the form " +
@@ -679,7 +679,7 @@ private[spark] object SparkSubmitUtils {
    * @param remoteRepos Comma-delimited string of remote repositories
    * @return A ChainResolver used by Ivy to search for and resolve dependencies.
    */
-  private[spark] def createRepoResolvers(remoteRepos: Option[String]): ChainResolver = {
+  def createRepoResolvers(remoteRepos: Option[String]): ChainResolver = {
     // We need a chain resolver if we want to check multiple repositories
     val cr = new ChainResolver
     cr.setName("list")
@@ -722,7 +722,7 @@ private[spark] object SparkSubmitUtils {
    * @param cacheDirectory directory where jars are cached
    * @return a comma-delimited list of paths for the dependencies
    */
-  private[spark] def resolveDependencyPaths(
+  def resolveDependencyPaths(
       artifacts: Array[AnyRef],
       cacheDirectory: File): String = {
     artifacts.map { artifactInfo =>
@@ -734,7 +734,7 @@ private[spark] object SparkSubmitUtils {
   }
 
   /** Adds the given maven coordinates to Ivy's module descriptor. */
-  private[spark] def addDependenciesToIvy(
+  def addDependenciesToIvy(
       md: DefaultModuleDescriptor,
       artifacts: Seq[MavenCoordinate],
       ivyConfName: String): Unit = {
@@ -748,7 +748,7 @@ private[spark] object SparkSubmitUtils {
   }
   
   /** Add exclusion rules for dependencies already included in the spark-assembly */
-  private[spark] def addExclusionRules(
+  def addExclusionRules(
       ivySettings: IvySettings,
       ivyConfName: String,
       md: DefaultModuleDescriptor): Unit = {
@@ -777,7 +777,7 @@ private[spark] object SparkSubmitUtils {
   }
 
   /** A nice function to use in tests as well. Values are dummy strings. */
-  private[spark] def getModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
+  def getModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
     ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
 
   /**
@@ -788,7 +788,7 @@ private[spark] object SparkSubmitUtils {
    * @return The comma-delimited path to the jars of the given maven artifacts including their
    *         transitive dependencies
    */
-  private[spark] def resolveMavenCoordinates(
+  def resolveMavenCoordinates(
       coordinates: String,
       remoteRepos: Option[String],
       ivyPath: Option[String],
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 94e4bdbfb7d7b..2250d5a28e4ef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.Utils
  * Parses and encapsulates arguments from the spark-submit script.
  * The env argument is used for testing.
  */
-private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
+private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
   extends SparkSubmitArgumentsParser {
   var master: String = null
   var deployMode: String = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index ffe940fbda2fb..2d24083a77b73 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -47,18 +47,18 @@ private[spark] class AppClient(
     conf: SparkConf)
   extends Logging {
 
-  val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_, AkkaUtils.protocol(actorSystem)))
+  private val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_, AkkaUtils.protocol(actorSystem)))
 
-  val REGISTRATION_TIMEOUT = 20.seconds
-  val REGISTRATION_RETRIES = 3
+  private val REGISTRATION_TIMEOUT = 20.seconds
+  private val REGISTRATION_RETRIES = 3
 
-  var masterAddress: Address = null
-  var actor: ActorRef = null
-  var appId: String = null
-  var registered = false
-  var activeMasterUrl: String = null
+  private var masterAddress: Address = null
+  private var actor: ActorRef = null
+  private var appId: String = null
+  private var registered = false
+  private var activeMasterUrl: String = null
 
-  class ClientActor extends Actor with ActorLogReceive with Logging {
+  private class ClientActor extends Actor with ActorLogReceive with Logging {
     var master: ActorSelection = null
     var alreadyDisconnected = false  // To avoid calling listener.disconnected() multiple times
     var alreadyDead = false  // To avoid calling listener.dead() multiple times
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 88a0862b96afe..c1c4812f17fbe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -23,7 +23,7 @@ import org.apache.spark.util.{AkkaUtils, Utils}
 
 private[spark] object TestClient {
 
-  class TestListener extends AppClientListener with Logging {
+  private class TestListener extends AppClientListener with Logging {
     def connected(id: String) {
       logInfo("Connected to master, got app ID " + id)
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 553bf3cb945ab..ea6c85ee511d5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
 
 import org.apache.spark.ui.SparkUI
 
-private[spark] case class ApplicationHistoryInfo(
+private[history] case class ApplicationHistoryInfo(
     id: String,
     name: String,
     startTime: Long,
@@ -28,7 +28,7 @@ private[spark] case class ApplicationHistoryInfo(
     sparkUser: String,
     completed: Boolean = false)
 
-private[spark] abstract class ApplicationHistoryProvider {
+private[history] abstract class ApplicationHistoryProvider {
 
   /**
    * Returns a list of applications available for the history server to show.
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 16d88c17d1a76..db7c499661319 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -93,7 +93,7 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
    */
   private def getRunner(operateFun: () => Unit): Runnable = {
     new Runnable() {
-      override def run() = Utils.logUncaughtExceptions {
+      override def run() = Utils.tryOrExit {
         operateFun()
       }
     }
@@ -233,7 +233,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
       } catch {
         case e: Exception =>
           logError(
-            s"Exception encountered when attempting to load application log ${fileStatus.getPath}")
+            s"Exception encountered when attempting to load application log ${fileStatus.getPath}",
+            e)
           None
       }
     }.toSeq.sortWith(compareAppInfo)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 26ebc75971c66..6e432d63c6b5a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 
-private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
+private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
 
   private val pageSize = 20
   private val plusOrMinus = 2
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index b1270ade9f750..a2a97a7877ce7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -23,7 +23,8 @@ import org.apache.spark.util.Utils
 /**
  * Command-line parser for the master.
  */
-private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]) extends Logging {
+private[history] class HistoryServerArguments(conf: SparkConf, args: Array[String]) 
+  extends Logging {
   private var propertiesFile: String = null
 
   parse(args.toList)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index a962dc4af2f6c..536aedb6f9fe9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.ApplicationDescription
 import org.apache.spark.util.Utils
 
-private[spark] class ApplicationInfo(
+private[deploy] class ApplicationInfo(
     val startTime: Long,
     val id: String,
     val desc: ApplicationDescription,
@@ -75,14 +75,15 @@ private[spark] class ApplicationInfo(
     }
   }
 
-  def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): ExecutorDesc = {
+  private[master] def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): 
+  ExecutorDesc = {
     val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
     executors(exec.id) = exec
     coresGranted += cores
     exec
   }
 
-  def removeExecutor(exec: ExecutorDesc) {
+  private[master] def removeExecutor(exec: ExecutorDesc) {
     if (executors.contains(exec.id)) {
       removedExecutors += executors(exec.id)
       executors -= exec.id
@@ -90,22 +91,22 @@ private[spark] class ApplicationInfo(
     }
   }
 
-  val requestedCores = desc.maxCores.getOrElse(defaultCores)
+  private[master] val requestedCores = desc.maxCores.getOrElse(defaultCores)
 
-  def coresLeft: Int = requestedCores - coresGranted
+  private[master] def coresLeft: Int = requestedCores - coresGranted
 
   private var _retryCount = 0
 
-  def retryCount = _retryCount
+  private[master] def retryCount = _retryCount
 
-  def incrementRetryCount() = {
+  private[master] def incrementRetryCount() = {
     _retryCount += 1
     _retryCount
   }
 
-  def resetRetryCount() = _retryCount = 0
+  private[master] def resetRetryCount() = _retryCount = 0
 
-  def markFinished(endState: ApplicationState.Value) {
+  private[master] def markFinished(endState: ApplicationState.Value) {
     state = endState
     endTime = System.currentTimeMillis()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
index 38db02cd2421b..017e8b55cbe7f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
@@ -21,7 +21,7 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.metrics.source.Source
 
-class ApplicationSource(val application: ApplicationInfo) extends Source {
+private[master] class ApplicationSource(val application: ApplicationInfo) extends Source {
   override val metricRegistry = new MetricRegistry()
   override val sourceName = "%s.%s.%s".format("application", application.desc.name,
     System.currentTimeMillis())
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
index f5b946329ae9b..37bfcdfdf4777 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object ApplicationState extends Enumeration {
+private[master] object ApplicationState extends Enumeration {
 
   type ApplicationState = Value
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
index 9d3d7938c6ccb..b197dbcbfe294 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.DriverDescription
 import org.apache.spark.util.Utils
 
-private[spark] class DriverInfo(
+private[deploy] class DriverInfo(
     val startTime: Long,
     val id: String,
     val desc: DriverDescription,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala
index 26a68bade3c60..35ff33a61653c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object DriverState extends Enumeration {
+private[deploy] object DriverState extends Enumeration {
 
   type DriverState = Value
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
index 5d620dfcabad5..fc62b094def67 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.master
 
 import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
 
-private[spark] class ExecutorDesc(
+private[master] class ExecutorDesc(
     val id: Int,
     val application: ApplicationInfo,
     val worker: WorkerInfo,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index 36a2e2c6a6349..d2d30bfd7fcba 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -33,7 +33,7 @@ import org.apache.spark.Logging
  * @param dir Directory to store files. Created if non-existent (but not recursively).
  * @param serialization Used to serialize our objects.
  */
-private[spark] class FileSystemPersistenceEngine(
+private[master] class FileSystemPersistenceEngine(
     val dir: String,
     val serialization: Serialization)
   extends PersistenceEngine with Logging {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 22935c9b1d394..1b42121c8db05 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -49,7 +49,7 @@ import org.apache.spark.scheduler.{EventLoggingListener, ReplayListenerBus}
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 
-private[spark] class Master(
+private[master] class Master(
     host: String,
     port: Int,
     webUiPort: Int,
@@ -59,65 +59,68 @@ private[spark] class Master(
 
   import context.dispatcher   // to use Akka's scheduler.schedule()
 
-  val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
 
-  def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
-  val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
-  val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
-  val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
-  val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
-  val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
+  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
+  
+  private val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
+  private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
+  private val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
+  private val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
+  private val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
 
   val workers = new HashSet[WorkerInfo]
-  val idToWorker = new HashMap[String, WorkerInfo]
-  val addressToWorker = new HashMap[Address, WorkerInfo]
-
-  val apps = new HashSet[ApplicationInfo]
   val idToApp = new HashMap[String, ApplicationInfo]
-  val actorToApp = new HashMap[ActorRef, ApplicationInfo]
-  val addressToApp = new HashMap[Address, ApplicationInfo]
   val waitingApps = new ArrayBuffer[ApplicationInfo]
-  val completedApps = new ArrayBuffer[ApplicationInfo]
-  var nextAppNumber = 0
-  val appIdToUI = new HashMap[String, SparkUI]
+  val apps = new HashSet[ApplicationInfo]
+
+  private val idToWorker = new HashMap[String, WorkerInfo]
+  private val addressToWorker = new HashMap[Address, WorkerInfo]
+
+  private val actorToApp = new HashMap[ActorRef, ApplicationInfo]
+  private val addressToApp = new HashMap[Address, ApplicationInfo]
+  private val completedApps = new ArrayBuffer[ApplicationInfo]
+  private var nextAppNumber = 0
+  private val appIdToUI = new HashMap[String, SparkUI]
 
-  val drivers = new HashSet[DriverInfo]
-  val completedDrivers = new ArrayBuffer[DriverInfo]
-  val waitingDrivers = new ArrayBuffer[DriverInfo] // Drivers currently spooled for scheduling
-  var nextDriverNumber = 0
+  private val drivers = new HashSet[DriverInfo]
+  private val completedDrivers = new ArrayBuffer[DriverInfo]
+  // Drivers currently spooled for scheduling
+  private val waitingDrivers = new ArrayBuffer[DriverInfo] 
+  private var nextDriverNumber = 0
 
   Utils.checkHost(host, "Expected hostname")
 
-  val masterMetricsSystem = MetricsSystem.createMetricsSystem("master", conf, securityMgr)
-  val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications", conf,
+  private val masterMetricsSystem = MetricsSystem.createMetricsSystem("master", conf, securityMgr)
+  private val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications", conf,
     securityMgr)
-  val masterSource = new MasterSource(this)
+  private val masterSource = new MasterSource(this)
 
-  val webUi = new MasterWebUI(this, webUiPort)
+  private val webUi = new MasterWebUI(this, webUiPort)
 
-  val masterPublicAddress = {
+  private val masterPublicAddress = {
     val envVar = conf.getenv("SPARK_PUBLIC_DNS")
     if (envVar != null) envVar else host
   }
 
-  val masterUrl = "spark://" + host + ":" + port
-  var masterWebUiUrl: String = _
+  private val masterUrl = "spark://" + host + ":" + port
+  private var masterWebUiUrl: String = _
 
-  var state = RecoveryState.STANDBY
+  private var state = RecoveryState.STANDBY
 
-  var persistenceEngine: PersistenceEngine = _
+  private var persistenceEngine: PersistenceEngine = _
 
-  var leaderElectionAgent: LeaderElectionAgent = _
+  private var leaderElectionAgent: LeaderElectionAgent = _
 
   private var recoveryCompletionTask: Cancellable = _
 
   // As a temporary workaround before better ways of configuring memory, we allow users to set
   // a flag that will perform round-robin scheduling across the nodes (spreading out each app
   // among all the nodes) instead of trying to consolidate each app onto a small # of nodes.
-  val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
+  private val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
 
   // Default maxCores for applications that don't specify it (i.e. pass Int.MaxValue)
-  val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
+  private val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
   if (defaultCores < 1) {
     throw new SparkException("spark.deploy.defaultCores must be positive")
   }
@@ -449,11 +452,11 @@ private[spark] class Master(
     }
   }
 
-  def canCompleteRecovery =
+  private def canCompleteRecovery =
     workers.count(_.state == WorkerState.UNKNOWN) == 0 &&
       apps.count(_.state == ApplicationState.UNKNOWN) == 0
 
-  def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
+  private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
       storedWorkers: Seq[WorkerInfo]) {
     for (app <- storedApps) {
       logInfo("Trying to recover app: " + app.id)
@@ -484,7 +487,7 @@ private[spark] class Master(
     }
   }
 
-  def completeRecovery() {
+  private def completeRecovery() {
     // Ensure "only-once" recovery semantics using a short synchronization period.
     synchronized {
       if (state != RecoveryState.RECOVERING) { return }
@@ -517,7 +520,7 @@ private[spark] class Master(
    * launched an executor for the app on it (right now the standalone backend doesn't like having
    * two executors on the same worker).
    */
-  def canUse(app: ApplicationInfo, worker: WorkerInfo): Boolean = {
+  private def canUse(app: ApplicationInfo, worker: WorkerInfo): Boolean = {
     worker.memoryFree >= app.desc.memoryPerSlave && !worker.hasExecutor(app)
   }
 
@@ -596,7 +599,7 @@ private[spark] class Master(
     }
   }
 
-  def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc) {
+  private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
     worker.actor ! LaunchExecutor(masterUrl,
@@ -605,7 +608,7 @@ private[spark] class Master(
       exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
   }
 
-  def registerWorker(worker: WorkerInfo): Boolean = {
+  private def registerWorker(worker: WorkerInfo): Boolean = {
     // There may be one or more refs to dead workers on this same node (w/ different ID's),
     // remove them.
     workers.filter { w =>
@@ -633,7 +636,7 @@ private[spark] class Master(
     true
   }
 
-  def removeWorker(worker: WorkerInfo) {
+  private def removeWorker(worker: WorkerInfo) {
     logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
     worker.setState(WorkerState.DEAD)
     idToWorker -= worker.id
@@ -656,20 +659,20 @@ private[spark] class Master(
     persistenceEngine.removeWorker(worker)
   }
 
-  def relaunchDriver(driver: DriverInfo) {
+  private def relaunchDriver(driver: DriverInfo) {
     driver.worker = None
     driver.state = DriverState.RELAUNCHING
     waitingDrivers += driver
     schedule()
   }
 
-  def createApplication(desc: ApplicationDescription, driver: ActorRef): ApplicationInfo = {
+  private def createApplication(desc: ApplicationDescription, driver: ActorRef): ApplicationInfo = {
     val now = System.currentTimeMillis()
     val date = new Date(now)
     new ApplicationInfo(now, newApplicationId(date), desc, date, driver, defaultCores)
   }
 
-  def registerApplication(app: ApplicationInfo): Unit = {
+  private def registerApplication(app: ApplicationInfo): Unit = {
     val appAddress = app.driver.path.address
     if (addressToApp.contains(appAddress)) {
       logInfo("Attempted to re-register application at same address: " + appAddress)
@@ -684,7 +687,7 @@ private[spark] class Master(
     waitingApps += app
   }
 
-  def finishApplication(app: ApplicationInfo) {
+  private def finishApplication(app: ApplicationInfo) {
     removeApplication(app, ApplicationState.FINISHED)
   }
 
@@ -732,7 +735,7 @@ private[spark] class Master(
    * Rebuild a new SparkUI from the given application's event logs.
    * Return whether this is successful.
    */
-  def rebuildSparkUI(app: ApplicationInfo): Boolean = {
+  private def rebuildSparkUI(app: ApplicationInfo): Boolean = {
     val appName = app.desc.name
     val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
     try {
@@ -798,14 +801,14 @@ private[spark] class Master(
   }
 
   /** Generate a new app ID given a app's submission date */
-  def newApplicationId(submitDate: Date): String = {
+  private def newApplicationId(submitDate: Date): String = {
     val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber)
     nextAppNumber += 1
     appId
   }
 
   /** Check for, and remove, any timed-out workers */
-  def timeOutDeadWorkers() {
+  private def timeOutDeadWorkers() {
     // Copy the workers into an array so we don't modify the hashset while iterating through it
     val currentTime = System.currentTimeMillis()
     val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT).toArray
@@ -822,19 +825,19 @@ private[spark] class Master(
     }
   }
 
-  def newDriverId(submitDate: Date): String = {
+  private def newDriverId(submitDate: Date): String = {
     val appId = "driver-%s-%04d".format(createDateFormat.format(submitDate), nextDriverNumber)
     nextDriverNumber += 1
     appId
   }
 
-  def createDriver(desc: DriverDescription): DriverInfo = {
+  private def createDriver(desc: DriverDescription): DriverInfo = {
     val now = System.currentTimeMillis()
     val date = new Date(now)
     new DriverInfo(now, newDriverId(date), desc, date)
   }
 
-  def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
+  private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
     logInfo("Launching driver " + driver.id + " on worker " + worker.id)
     worker.addDriver(driver)
     driver.worker = Some(worker)
@@ -842,7 +845,10 @@ private[spark] class Master(
     driver.state = DriverState.RUNNING
   }
 
-  def removeDriver(driverId: String, finalState: DriverState, exception: Option[Exception]) {
+  private def removeDriver(
+      driverId: String, 
+      finalState: DriverState, 
+      exception: Option[Exception]) {
     drivers.find(d => d.id == driverId) match {
       case Some(driver) =>
         logInfo(s"Removing driver: $driverId")
@@ -863,7 +869,7 @@ private[spark] class Master(
   }
 }
 
-private[spark] object Master extends Logging {
+private[deploy] object Master extends Logging {
   val systemName = "sparkMaster"
   private val actorName = "Master"
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index e34bee7854292..435b9b12f83b8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -23,7 +23,7 @@ import org.apache.spark.util.{IntParam, Utils}
 /**
  * Command-line parser for the master.
  */
-private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
+private[master] class MasterArguments(args: Array[String], conf: SparkConf) {
   var host = Utils.localHostName()
   var port = 7077
   var webUiPort = 8080
@@ -49,7 +49,7 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
     webUiPort = conf.get("spark.master.ui.port").toInt
   }
 
-  def parse(args: List[String]): Unit = args match {
+  private def parse(args: List[String]): Unit = args match {
     case ("--ip" | "-i") :: value :: tail =>
       Utils.checkHost(value, "ip no longer supported, please use hostname " + value)
       host = value
@@ -84,7 +84,7 @@ private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int) {
     System.err.println(
       "Usage: Master [options]\n" +
       "\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index 2e0e1e7036ac8..da5060778edeb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -87,7 +87,7 @@ trait PersistenceEngine {
   def close() {}
 }
 
-private[spark] class BlackHolePersistenceEngine extends PersistenceEngine {
+private[master] class BlackHolePersistenceEngine extends PersistenceEngine {
 
   override def persist(name: String, obj: Object): Unit = {}
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
index 1096eb0368357..1583bf1f60032 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
@@ -49,7 +49,7 @@ abstract class StandaloneRecoveryModeFactory(conf: SparkConf, serializer: Serial
  * LeaderAgent in this case is a no-op. Since leader is forever leader as the actual
  * recovery is made by restoring from filesystem.
  */
-private[spark] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
+private[master] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
   extends StandaloneRecoveryModeFactory(conf, serializer) with Logging {
   val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
 
@@ -61,7 +61,7 @@ private[spark] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer:
   def createLeaderElectionAgent(master: LeaderElectable) = new MonarchyLeaderAgent(master)
 }
 
-private[spark] class ZooKeeperRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
+private[master] class ZooKeeperRecoveryModeFactory(conf: SparkConf, serializer: Serialization)
   extends StandaloneRecoveryModeFactory(conf, serializer) {
   def createPersistenceEngine() = new ZooKeeperPersistenceEngine(conf, serializer)
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
index 256a5a7c28e47..aa0f02fa625cc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object RecoveryState extends Enumeration {
+private[deploy] object RecoveryState extends Enumeration {
   type MasterState = Value
 
   val STANDBY, ALIVE, RECOVERING, COMPLETING_RECOVERY = Value
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/SparkCuratorUtil.scala b/core/src/main/scala/org/apache/spark/deploy/master/SparkCuratorUtil.scala
index 4781a80d470e1..5b22481ea8c5f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/SparkCuratorUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/SparkCuratorUtil.scala
@@ -25,12 +25,12 @@ import org.apache.zookeeper.KeeperException
 
 import org.apache.spark.{Logging, SparkConf}
 
-object SparkCuratorUtil extends Logging {
+private[deploy] object SparkCuratorUtil extends Logging {
 
-  val ZK_CONNECTION_TIMEOUT_MILLIS = 15000
-  val ZK_SESSION_TIMEOUT_MILLIS = 60000
-  val RETRY_WAIT_MILLIS = 5000
-  val MAX_RECONNECT_ATTEMPTS = 3
+  private val ZK_CONNECTION_TIMEOUT_MILLIS = 15000
+  private val ZK_SESSION_TIMEOUT_MILLIS = 60000
+  private val RETRY_WAIT_MILLIS = 5000
+  private val MAX_RECONNECT_ATTEMPTS = 3
 
   def newClient(conf: SparkConf): CuratorFramework = {
     val ZK_URL = conf.get("spark.deploy.zookeeper.url")
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
index 0b36ef60051fc..b60baaadfb4bc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object WorkerState extends Enumeration {
+private[master] object WorkerState extends Enumeration {
   type WorkerState = Value
 
   val ALIVE, DEAD, DECOMMISSIONED, UNKNOWN = Value
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 8eaa0ad948519..4823fd7cac0cb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -24,7 +24,7 @@ import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.curator.framework.CuratorFramework
 import org.apache.curator.framework.recipes.leader.{LeaderLatchListener, LeaderLatch}
 
-private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectable,
+private[master] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectable,
     conf: SparkConf) extends LeaderLatchListener with LeaderElectionAgent with Logging  {
 
   val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
@@ -35,7 +35,7 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectab
 
   start()
 
-  def start() {
+  private def start() {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
     leaderLatch = new LeaderLatch(zk, WORKING_DIR)
@@ -72,7 +72,7 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectab
     }
   }
 
-  def updateLeadershipStatus(isLeader: Boolean) {
+  private def updateLeadershipStatus(isLeader: Boolean) {
     if (isLeader && status == LeadershipStatus.NOT_LEADER) {
       status = LeadershipStatus.LEADER
       masterActor.electedLeader()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index e11ac031fb9c6..1ac6677ad2b6d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -28,12 +28,12 @@ import org.apache.zookeeper.CreateMode
 import org.apache.spark.{Logging, SparkConf}
 
 
-private[spark] class ZooKeeperPersistenceEngine(conf: SparkConf, val serialization: Serialization)
+private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serialization: Serialization)
   extends PersistenceEngine
-  with Logging
-{
-  val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
-  val zk: CuratorFramework = SparkCuratorUtil.newClient(conf)
+  with Logging {
+  
+  private val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
+  private val zk: CuratorFramework = SparkCuratorUtil.newClient(conf)
 
   SparkCuratorUtil.mkdir(zk, WORKING_DIR)
 
@@ -61,7 +61,7 @@ private[spark] class ZooKeeperPersistenceEngine(conf: SparkConf, val serializati
     zk.create().withMode(CreateMode.PERSISTENT).forPath(path, serialized)
   }
 
-  def deserializeFromFile[T](filename: String)(implicit m: ClassTag[T]): Option[T] = {
+  private def deserializeFromFile[T](filename: String)(implicit m: ClassTag[T]): Option[T] = {
     val fileData = zk.getData().forPath(WORKING_DIR + "/" + filename)
     val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 76fc40e17d9a8..761aa8f7b1ef6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -32,7 +32,7 @@ import org.apache.spark.deploy.master.ExecutorDesc
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
-private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") {
+private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") {
 
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala
index d8daff3e7fb9c..e021f1eef794f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/HistoryNotFoundPage.scala
@@ -24,7 +24,7 @@ import scala.xml.Node
 
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
-private[spark] class HistoryNotFoundPage(parent: MasterWebUI)
+private[ui] class HistoryNotFoundPage(parent: MasterWebUI)
   extends WebUIPage("history/not-found") {
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index c086cadca2c7d..dee2e4a447c6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -31,7 +31,7 @@ import org.apache.spark.deploy.master._
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 import org.apache.spark.util.Utils
 
-private[spark] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
+private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private val master = parent.masterActorRef
   private val timeout = parent.timeout
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index 170f90a00ad2a..1b670418ab1ff 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.AkkaUtils
 /**
  * Web UI server for the standalone master.
  */
-private[spark]
+private[master]
 class MasterWebUI(val master: Master, requestedPort: Int)
   extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging {
 
@@ -62,6 +62,6 @@ class MasterWebUI(val master: Master, requestedPort: Int)
   }
 }
 
-private[spark] object MasterWebUI {
-  val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
+private[master] object MasterWebUI {
+  private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestClient.scala
index c4be1f19e8e9f..420442f7564cc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestClient.scala
@@ -52,7 +52,7 @@ import org.apache.spark.{Logging, SparkConf, SPARK_VERSION => sparkVersion}
  * implementation of this client can use that information to retry using the version specified
  * by the server.
  */
-private[spark] class StandaloneRestClient extends Logging {
+private[deploy] class StandaloneRestClient extends Logging {
   import StandaloneRestClient._
 
   /**
@@ -61,7 +61,7 @@ private[spark] class StandaloneRestClient extends Logging {
    * If the submission was successful, poll the status of the submission and report
    * it to the user. Otherwise, report the error message provided by the server.
    */
-  def createSubmission(
+  private[rest] def createSubmission(
       master: String,
       request: CreateSubmissionRequest): SubmitRestProtocolResponse = {
     logInfo(s"Submitting a request to launch an application in $master.")
@@ -106,7 +106,7 @@ private[spark] class StandaloneRestClient extends Logging {
   }
 
   /** Construct a message that captures the specified parameters for submitting an application. */
-  def constructSubmitRequest(
+  private[rest] def constructSubmitRequest(
       appResource: String,
       mainClass: String,
       appArgs: Array[String],
@@ -291,16 +291,16 @@ private[spark] class StandaloneRestClient extends Logging {
   }
 }
 
-private[spark] object StandaloneRestClient {
-  val REPORT_DRIVER_STATUS_INTERVAL = 1000
-  val REPORT_DRIVER_STATUS_MAX_TRIES = 10
+private[rest] object StandaloneRestClient {
+  private val REPORT_DRIVER_STATUS_INTERVAL = 1000
+  private val REPORT_DRIVER_STATUS_MAX_TRIES = 10
   val PROTOCOL_VERSION = "v1"
 
   /**
    * Submit an application, assuming Spark parameters are specified through the given config.
    * This is abstracted to its own method for testing purposes.
    */
-  private[rest] def run(
+  def run(
       appResource: String,
       mainClass: String,
       appArgs: Array[String],
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index f9e0478e4f874..4f19af59f409f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -58,7 +58,7 @@ import org.apache.spark.deploy.ClientArguments._
  * @param masterUrl the URL of the Master new drivers will attempt to connect to
  * @param masterConf the conf used by the Master
  */
-private[spark] class StandaloneRestServer(
+private[deploy] class StandaloneRestServer(
     host: String,
     requestedPort: Int,
     masterActor: ActorRef,
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolException.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolException.scala
index d7a0bdbe10778..b97921ec934a0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolException.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolException.scala
@@ -20,17 +20,17 @@ package org.apache.spark.deploy.rest
 /**
  * An exception thrown in the REST application submission protocol.
  */
-private[spark] class SubmitRestProtocolException(message: String, cause: Throwable = null)
+private[rest] class SubmitRestProtocolException(message: String, cause: Throwable = null)
   extends Exception(message, cause)
 
 /**
  * An exception thrown if a field is missing from a [[SubmitRestProtocolMessage]].
  */
-private[spark] class SubmitRestMissingFieldException(message: String)
+private[rest] class SubmitRestMissingFieldException(message: String)
   extends SubmitRestProtocolException(message)
 
 /**
  * An exception thrown if the REST client cannot reach the REST server.
  */
-private[spark] class SubmitRestConnectionException(message: String, cause: Throwable)
+private[deploy] class SubmitRestConnectionException(message: String, cause: Throwable)
   extends SubmitRestProtocolException(message, cause)
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
index 8f36635674a28..e6615a3174ce1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolMessage.scala
@@ -39,7 +39,7 @@ import org.apache.spark.util.Utils
 @JsonInclude(Include.NON_NULL)
 @JsonAutoDetect(getterVisibility = Visibility.ANY, setterVisibility = Visibility.ANY)
 @JsonPropertyOrder(alphabetic = true)
-private[spark] abstract class SubmitRestProtocolMessage {
+private[rest] abstract class SubmitRestProtocolMessage {
   @JsonIgnore
   val messageType = Utils.getFormattedClassName(this)
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
index 9e1fd8c40cabd..d80abdf15fb34 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.util.Utils
 /**
  * An abstract request sent from the client in the REST application submission protocol.
  */
-private[spark] abstract class SubmitRestProtocolRequest extends SubmitRestProtocolMessage {
+private[rest] abstract class SubmitRestProtocolRequest extends SubmitRestProtocolMessage {
   var clientSparkVersion: String = null
   protected override def doValidate(): Unit = {
     super.doValidate()
@@ -35,7 +35,7 @@ private[spark] abstract class SubmitRestProtocolRequest extends SubmitRestProtoc
 /**
  * A request to launch a new application in the REST application submission protocol.
  */
-private[spark] class CreateSubmissionRequest extends SubmitRestProtocolRequest {
+private[rest] class CreateSubmissionRequest extends SubmitRestProtocolRequest {
   var appResource: String = null
   var mainClass: String = null
   var appArgs: Array[String] = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolResponse.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolResponse.scala
index 16dfe041d4bea..8fde8c142a4c1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolResponse.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolResponse.scala
@@ -22,7 +22,7 @@ import java.lang.Boolean
 /**
  * An abstract response sent from the server in the REST application submission protocol.
  */
-private[spark] abstract class SubmitRestProtocolResponse extends SubmitRestProtocolMessage {
+private[rest] abstract class SubmitRestProtocolResponse extends SubmitRestProtocolMessage {
   var serverSparkVersion: String = null
   var success: Boolean = null
   var unknownFields: Array[String] = null
@@ -35,7 +35,7 @@ private[spark] abstract class SubmitRestProtocolResponse extends SubmitRestProto
 /**
  * A response to a [[CreateSubmissionRequest]] in the REST application submission protocol.
  */
-private[spark] class CreateSubmissionResponse extends SubmitRestProtocolResponse {
+private[rest] class CreateSubmissionResponse extends SubmitRestProtocolResponse {
   var submissionId: String = null
   protected override def doValidate(): Unit = {
     super.doValidate()
@@ -46,7 +46,7 @@ private[spark] class CreateSubmissionResponse extends SubmitRestProtocolResponse
 /**
  * A response to a kill request in the REST application submission protocol.
  */
-private[spark] class KillSubmissionResponse extends SubmitRestProtocolResponse {
+private[rest] class KillSubmissionResponse extends SubmitRestProtocolResponse {
   var submissionId: String = null
   protected override def doValidate(): Unit = {
     super.doValidate()
@@ -58,7 +58,7 @@ private[spark] class KillSubmissionResponse extends SubmitRestProtocolResponse {
 /**
  * A response to a status request in the REST application submission protocol.
  */
-private[spark] class SubmissionStatusResponse extends SubmitRestProtocolResponse {
+private[rest] class SubmissionStatusResponse extends SubmitRestProtocolResponse {
   var submissionId: String = null
   var driverState: String = null
   var workerId: String = null
@@ -74,7 +74,7 @@ private[spark] class SubmissionStatusResponse extends SubmitRestProtocolResponse
 /**
  * An error response message used in the REST application submission protocol.
  */
-private[spark] class ErrorResponse extends SubmitRestProtocolResponse {
+private[rest] class ErrorResponse extends SubmitRestProtocolResponse {
   // The highest protocol version that the server knows about
   // This is set when the client specifies an unknown version
   var highestProtocolVersion: String = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index 83f78cf47306c..0a1d60f58bc58 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.Utils
 /**
  ** Utilities for running commands with the spark classpath.
  */
-private[spark]
+private[deploy]
 object CommandUtils extends Logging {
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index e16bccb24d2c4..27a9eabb1ede7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -37,8 +37,8 @@ import org.apache.spark.util.{Clock, SystemClock}
  * Manages the execution of one driver, including automatically restarting the driver on failure.
  * This is currently only used in standalone cluster deploy mode.
  */
-private[spark] class DriverRunner(
-    val conf: SparkConf,
+private[deploy] class DriverRunner(
+    conf: SparkConf,
     val driverId: String,
     val workDir: File,
     val sparkHome: File,
@@ -47,24 +47,24 @@ private[spark] class DriverRunner(
     val workerUrl: String)
   extends Logging {
 
-  @volatile var process: Option[Process] = None
-  @volatile var killed = false
+  @volatile private var process: Option[Process] = None
+  @volatile private var killed = false
 
   // Populated once finished
-  var finalState: Option[DriverState] = None
-  var finalException: Option[Exception] = None
-  var finalExitCode: Option[Int] = None
+  private[worker] var finalState: Option[DriverState] = None
+  private[worker] var finalException: Option[Exception] = None
+  private var finalExitCode: Option[Int] = None
 
   // Decoupled for testing
-  private[deploy] def setClock(_clock: Clock) = clock = _clock
-  private[deploy] def setSleeper(_sleeper: Sleeper) = sleeper = _sleeper
+  def setClock(_clock: Clock) = clock = _clock
+  def setSleeper(_sleeper: Sleeper) = sleeper = _sleeper
   private var clock: Clock = new SystemClock()
   private var sleeper = new Sleeper {
     def sleep(seconds: Int): Unit = (0 until seconds).takeWhile(f => {Thread.sleep(1000); !killed})
   }
 
   /** Starts a thread to run and manage the driver. */
-  def start() = {
+  private[worker] def start() = {
     new Thread("DriverRunner for " + driverId) {
       override def run() {
         try {
@@ -106,7 +106,7 @@ private[spark] class DriverRunner(
   }
 
   /** Terminate this driver (or prevent it from ever starting if not yet started) */
-  def kill() {
+  private[worker] def kill() {
     synchronized {
       process.foreach(p => p.destroy())
       killed = true
@@ -169,7 +169,7 @@ private[spark] class DriverRunner(
     runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
   }
 
-  private[deploy] def runCommandWithRetry(command: ProcessBuilderLike, initialize: Process => Unit,
+  def runCommandWithRetry(command: ProcessBuilderLike, initialize: Process => Unit,
     supervise: Boolean) {
     // Time to wait between submission retries.
     var waitSeconds = 1
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 023f3c6269062..83e24a7a1f80c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.logging.FileAppender
  * Manages the execution of one executor process.
  * This is currently only used in standalone mode.
  */
-private[spark] class ExecutorRunner(
+private[deploy] class ExecutorRunner(
     val appId: String,
     val execId: Int,
     val appDesc: ApplicationDescription,
@@ -48,22 +48,22 @@ private[spark] class ExecutorRunner(
     val sparkHome: File,
     val executorDir: File,
     val workerUrl: String,
-    val conf: SparkConf,
+    conf: SparkConf,
     val appLocalDirs: Seq[String],
     var state: ExecutorState.Value)
   extends Logging {
 
-  val fullId = appId + "/" + execId
-  var workerThread: Thread = null
-  var process: Process = null
-  var stdoutAppender: FileAppender = null
-  var stderrAppender: FileAppender = null
+  private val fullId = appId + "/" + execId
+  private var workerThread: Thread = null
+  private var process: Process = null
+  private var stdoutAppender: FileAppender = null
+  private var stderrAppender: FileAppender = null
 
   // NOTE: This is now redundant with the automated shut-down enforced by the Executor. It might
   // make sense to remove this in the future.
-  var shutdownHook: Thread = null
+  private var shutdownHook: Thread = null
 
-  def start() {
+  private[worker] def start() {
     workerThread = new Thread("ExecutorRunner for " + fullId) {
       override def run() { fetchAndRunExecutor() }
     }
@@ -99,7 +99,7 @@ private[spark] class ExecutorRunner(
   }
 
   /** Stop this executor runner, including killing the process it launched */
-  def kill() {
+  private[worker] def kill() {
     if (workerThread != null) {
       // the workerThread will kill the child process when interrupted
       workerThread.interrupt()
@@ -114,7 +114,7 @@ private[spark] class ExecutorRunner(
   }
 
   /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
-  def substituteVariables(argument: String): String = argument match {
+  private[worker] def substituteVariables(argument: String): String = argument match {
     case "{{WORKER_URL}}" => workerUrl
     case "{{EXECUTOR_ID}}" => execId.toString
     case "{{HOSTNAME}}" => host
@@ -126,7 +126,7 @@ private[spark] class ExecutorRunner(
   /**
    * Download and run the executor described in our ApplicationDescription
    */
-  def fetchAndRunExecutor() {
+  private def fetchAndRunExecutor() {
     try {
       // Launch the process
       val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index f2e7418f4bf15..c1b0a295f9f74 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -42,7 +42,7 @@ import org.apache.spark.util.{ActorLogReceive, AkkaUtils, SignalLogger, Utils}
 /**
   * @param masterAkkaUrls Each url should be a valid akka url.
   */
-private[spark] class Worker(
+private[worker] class Worker(
     host: String,
     port: Int,
     webUiPort: Int,
@@ -60,85 +60,90 @@ private[spark] class Worker(
   Utils.checkHost(host, "Expected hostname")
   assert (port > 0)
 
-  def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For worker and executor IDs
+  // For worker and executor IDs
+  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  
 
   // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
-  val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
+  private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
 
   // Model retries to connect to the master, after Hadoop's model.
   // The first six attempts to reconnect are in shorter intervals (between 5 and 15 seconds)
   // Afterwards, the next 10 attempts are between 30 and 90 seconds.
   // A bit of randomness is introduced so that not all of the workers attempt to reconnect at
   // the same time.
-  val INITIAL_REGISTRATION_RETRIES = 6
-  val TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
-  val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
-  val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
+  private val INITIAL_REGISTRATION_RETRIES = 6
+  private val TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
+  private val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
+  private val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
     val randomNumberGenerator = new Random(UUID.randomUUID.getMostSignificantBits)
     randomNumberGenerator.nextDouble + FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND
   }
-  val INITIAL_REGISTRATION_RETRY_INTERVAL = (math.round(10 *
+  private val INITIAL_REGISTRATION_RETRY_INTERVAL = (math.round(10 *
     REGISTRATION_RETRY_FUZZ_MULTIPLIER)).seconds
-  val PROLONGED_REGISTRATION_RETRY_INTERVAL = (math.round(60
+  private val PROLONGED_REGISTRATION_RETRY_INTERVAL = (math.round(60
     * REGISTRATION_RETRY_FUZZ_MULTIPLIER)).seconds
 
-  val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
+  private val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)
   // How often worker will clean up old app folders
-  val CLEANUP_INTERVAL_MILLIS = conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
+  private val CLEANUP_INTERVAL_MILLIS = 
+    conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
   // TTL for app folders/data;  after TTL expires it will be cleaned up
-  val APP_DATA_RETENTION_SECS = conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
-
-  val testing: Boolean = sys.props.contains("spark.testing")
-  var master: ActorSelection = null
-  var masterAddress: Address = null
-  var activeMasterUrl: String = ""
-  var activeMasterWebUiUrl : String = ""
-  val akkaUrl = AkkaUtils.address(
+  private val APP_DATA_RETENTION_SECS = 
+    conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
+
+  private val testing: Boolean = sys.props.contains("spark.testing")
+  private var master: ActorSelection = null
+  private var masterAddress: Address = null
+  private var activeMasterUrl: String = ""
+  private[worker] var activeMasterWebUiUrl : String = ""
+  private val akkaUrl = AkkaUtils.address(
     AkkaUtils.protocol(context.system),
     actorSystemName,
     host,
     port,
     actorName)
-  @volatile var registered = false
-  @volatile var connected = false
-  val workerId = generateWorkerId()
-  val sparkHome =
+  @volatile private var registered = false
+  @volatile private var connected = false
+  private val workerId = generateWorkerId()
+  private val sparkHome =
     if (testing) {
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
       new File(sys.props("spark.test.home"))
     } else {
       new File(sys.env.get("SPARK_HOME").getOrElse("."))
     }
+  
   var workDir: File = null
-  val executors = new HashMap[String, ExecutorRunner]
   val finishedExecutors = new HashMap[String, ExecutorRunner]
   val drivers = new HashMap[String, DriverRunner]
+  val executors = new HashMap[String, ExecutorRunner]
   val finishedDrivers = new HashMap[String, DriverRunner]
   val appDirectories = new HashMap[String, Seq[String]]
   val finishedApps = new HashSet[String]
 
   // The shuffle service is not actually started unless configured.
-  val shuffleService = new StandaloneWorkerShuffleService(conf, securityMgr)
+  private val shuffleService = new StandaloneWorkerShuffleService(conf, securityMgr)
 
-  val publicAddress = {
+  private val publicAddress = {
     val envVar = conf.getenv("SPARK_PUBLIC_DNS")
     if (envVar != null) envVar else host
   }
-  var webUi: WorkerWebUI = null
+  private var webUi: WorkerWebUI = null
 
-  var coresUsed = 0
-  var memoryUsed = 0
-  var connectionAttemptCount = 0
+  private var connectionAttemptCount = 0
 
-  val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
-  val workerSource = new WorkerSource(this)
+  private val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
+  private val workerSource = new WorkerSource(this)
+  
+  private var registrationRetryTimer: Option[Cancellable] = None
 
-  var registrationRetryTimer: Option[Cancellable] = None
+  var coresUsed = 0
+  var memoryUsed = 0
 
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
-  def createWorkDir() {
+  private def createWorkDir() {
     workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
     try {
       // This sporadically fails - not sure why ... !workDir.exists() && !workDir.mkdirs()
@@ -175,7 +180,7 @@ private[spark] class Worker(
     metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
   }
 
-  def changeMaster(url: String, uiUrl: String) {
+  private def changeMaster(url: String, uiUrl: String) {
     // activeMasterUrl it's a valid Spark url since we receive it from master.
     activeMasterUrl = url
     activeMasterWebUiUrl = uiUrl
@@ -252,7 +257,7 @@ private[spark] class Worker(
     }
   }
 
-  def registerWithMaster() {
+  private def registerWithMaster() {
     // DisassociatedEvent may be triggered multiple times, so don't attempt registration
     // if there are outstanding registration attempts scheduled.
     registrationRetryTimer match {
@@ -506,7 +511,7 @@ private[spark] class Worker(
     }
   }
 
-  def generateWorkerId(): String = {
+  private def generateWorkerId(): String = {
     "worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
   }
 
@@ -521,7 +526,7 @@ private[spark] class Worker(
   }
 }
 
-private[spark] object Worker extends Logging {
+private[deploy] object Worker extends Logging {
   def main(argStrings: Array[String]) {
     SignalLogger.register(log)
     val conf = new SparkConf
@@ -554,7 +559,7 @@ private[spark] object Worker extends Logging {
     (actorSystem, boundPort)
   }
 
-  private[spark] def isUseLocalNodeSSLConfig(cmd: Command): Boolean = {
+  def isUseLocalNodeSSLConfig(cmd: Command): Boolean = {
     val pattern = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
     val result = cmd.javaOpts.collectFirst {
       case pattern(_result) => _result.toBoolean
@@ -562,7 +567,7 @@ private[spark] object Worker extends Logging {
     result.getOrElse(false)
   }
 
-  private[spark] def maybeUpdateSSLSettings(cmd: Command, conf: SparkConf): Command = {
+  def maybeUpdateSSLSettings(cmd: Command, conf: SparkConf): Command = {
     val prefix = "spark.ssl."
     val useNLC = "spark.ssl.useNodeLocalConf"
     if (isUseLocalNodeSSLConfig(cmd)) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 019cd70f2a229..88f9d880ac209 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkConf
 /**
  * Command-line parser for the worker.
  */
-private[spark] class WorkerArguments(args: Array[String], conf: SparkConf) {
+private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
   var host = Utils.localHostName()
   var port = 0
   var webUiPort = 8081
@@ -63,7 +63,7 @@ private[spark] class WorkerArguments(args: Array[String], conf: SparkConf) {
 
   checkWorkerMemory()
 
-  def parse(args: List[String]): Unit = args match {
+  private def parse(args: List[String]): Unit = args match {
     case ("--ip" | "-i") :: value :: tail =>
       Utils.checkHost(value, "ip no longer supported, please use hostname " + value)
       host = value
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
index df1e01b23b932..b36023bc40c3d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
@@ -21,7 +21,7 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.metrics.source.Source
 
-private[spark] class WorkerSource(val worker: Worker) extends Source {
+private[worker] class WorkerSource(val worker: Worker) extends Source {
   override val sourceName = "worker"
   override val metricRegistry = new MetricRegistry()
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
index 63a8ac817b618..09d866fb0cd90 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -48,7 +48,7 @@ private[spark] class WorkerWatcher(workerUrl: String)
   private val expectedHostPort = AddressFromURIString(workerUrl).hostPort
   private def isWorker(address: Address) = address.hostPort == expectedHostPort
 
-  def exitNonZero() = if (isTesting) isShutDown = true else System.exit(-1)
+  private def exitNonZero() = if (isTesting) isShutDown = true else System.exit(-1)
 
   override def receiveWithLogging = {
     case AssociatedEvent(localAddress, remoteAddress, inbound) if isWorker(remoteAddress) =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index ecb358c399819..88170d4df3053 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
 import org.apache.spark.Logging
 import org.apache.spark.util.logging.RollingFileAppender
 
-private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with Logging {
+private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with Logging {
   private val worker = parent.worker
   private val workDir = parent.workDir
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
index 720f13bfa829b..9f9f27d71e1ae 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
@@ -31,10 +31,9 @@ import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
 import org.apache.spark.ui.{WebUIPage, UIUtils}
 import org.apache.spark.util.Utils
 
-private[spark] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
-  val workerActor = parent.worker.self
-  val worker = parent.worker
-  val timeout = parent.timeout
+private[ui] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
+  private val workerActor = parent.worker.self
+  private val timeout = parent.timeout
 
   override def renderJson(request: HttpServletRequest): JValue = {
     val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 7ac81a2d87efd..de6423beb543e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -30,7 +30,7 @@ import org.apache.spark.util.AkkaUtils
 /**
  * Web UI server for the standalone worker.
  */
-private[spark]
+private[worker]
 class WorkerWebUI(
     val worker: Worker,
     val workDir: File,
@@ -38,7 +38,7 @@ class WorkerWebUI(
   extends WebUI(worker.securityMgr, requestedPort, worker.conf, name = "WorkerUI")
   with Logging {
 
-  val timeout = AkkaUtils.askTimeout(worker.conf)
+  private[ui] val timeout = AkkaUtils.askTimeout(worker.conf)
 
   initialize()
 
@@ -53,6 +53,6 @@ class WorkerWebUI(
   }
 }
 
-private[spark] object WorkerWebUI {
+private[ui] object WorkerWebUI {
   val STATIC_RESOURCE_BASE = SparkUI.STATIC_RESOURCE_DIR
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/CommitDeniedException.scala b/core/src/main/scala/org/apache/spark/executor/CommitDeniedException.scala
index f7604a321f007..f47d7ef511da1 100644
--- a/core/src/main/scala/org/apache/spark/executor/CommitDeniedException.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CommitDeniedException.scala
@@ -22,14 +22,12 @@ import org.apache.spark.{TaskCommitDenied, TaskEndReason}
 /**
  * Exception thrown when a task attempts to commit output to HDFS but is denied by the driver.
  */
-class CommitDeniedException(
+private[spark] class CommitDeniedException(
     msg: String,
     jobID: Int,
     splitID: Int,
     attemptID: Int)
   extends Exception(msg) {
 
-  def toTaskEndReason: TaskEndReason = new TaskCommitDenied(jobID, splitID, attemptID)
-
+  def toTaskEndReason: TaskEndReason = TaskCommitDenied(jobID, splitID, attemptID)
 }
-
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index a897e532184ac..bf3135ef081c1 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -21,7 +21,7 @@ import java.io.File
 import java.lang.management.ManagementFactory
 import java.net.URL
 import java.nio.ByteBuffer
-import java.util.concurrent._
+import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -31,15 +31,17 @@ import akka.actor.Props
 
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Task}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
-import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader,
-  SparkUncaughtExceptionHandler, AkkaUtils, Utils}
+import org.apache.spark.util._
 
 /**
- * Spark executor used with Mesos, YARN, and the standalone scheduler.
- * In coarse-grained mode, an existing actor system is provided.
+ * Spark executor, backed by a threadpool to run tasks.
+ *
+ * This can be used with Mesos, YARN, and the standalone scheduler.
+ * An internal RPC interface (at the moment Akka) is used for communication with the driver,
+ * except in the case of Mesos fine-grained mode.
  */
 private[spark] class Executor(
     executorId: String,
@@ -47,8 +49,8 @@ private[spark] class Executor(
     env: SparkEnv,
     userClassPath: Seq[URL] = Nil,
     isLocal: Boolean = false)
-  extends Logging
-{
+  extends Logging {
+
   logInfo(s"Starting executor ID $executorId on host $executorHostname")
 
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
@@ -78,9 +80,8 @@ private[spark] class Executor(
   }
 
   // Start worker thread pool
-  val threadPool = Utils.newDaemonCachedThreadPool("Executor task launch worker")
-
-  val executorSource = new ExecutorSource(this, executorId)
+  private val threadPool = Utils.newDaemonCachedThreadPool("Executor task launch worker")
+  private val executorSource = new ExecutorSource(threadPool, executorId)
 
   if (!isLocal) {
     env.metricsSystem.registerSource(executorSource)
@@ -103,7 +104,7 @@ private[spark] class Executor(
   private val replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader)
 
   // Set the classloader for serializer
-  env.serializer.setDefaultClassLoader(urlClassLoader)
+  env.serializer.setDefaultClassLoader(replClassLoader)
 
   // Akka's message frame size. If task result is bigger than this, we use the block manager
   // to send the result back.
@@ -122,21 +123,21 @@ private[spark] class Executor(
       taskId: Long,
       attemptNumber: Int,
       taskName: String,
-      serializedTask: ByteBuffer) {
+      serializedTask: ByteBuffer): Unit = {
     val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,
       serializedTask)
     runningTasks.put(taskId, tr)
     threadPool.execute(tr)
   }
 
-  def killTask(taskId: Long, interruptThread: Boolean) {
+  def killTask(taskId: Long, interruptThread: Boolean): Unit = {
     val tr = runningTasks.get(taskId)
     if (tr != null) {
       tr.kill(interruptThread)
     }
   }
 
-  def stop() {
+  def stop(): Unit = {
     env.metricsSystem.report()
     env.actorSystem.stop(executorActor)
     isStopped = true
@@ -146,7 +147,10 @@ private[spark] class Executor(
     }
   }
 
-  private def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
+  /** Returns the total amount of time this JVM process has spent in garbage collection. */
+  private def computeTotalGcTime(): Long = {
+    ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
+  }
 
   class TaskRunner(
       execBackend: ExecutorBackend,
@@ -156,12 +160,19 @@ private[spark] class Executor(
       serializedTask: ByteBuffer)
     extends Runnable {
 
+    /** Whether this task has been killed. */
     @volatile private var killed = false
-    @volatile var task: Task[Any] = _
-    @volatile var attemptedTask: Option[Task[Any]] = None
+
+    /** How much the JVM process has spent in GC when the task starts to run. */
     @volatile var startGCTime: Long = _
 
-    def kill(interruptThread: Boolean) {
+    /**
+     * The task to run. This will be set in run() by deserializing the task binary coming
+     * from the driver. Once it is set, it will never be changed.
+     */
+    @volatile var task: Task[Any] = _
+
+    def kill(interruptThread: Boolean): Unit = {
       logInfo(s"Executor is trying to kill $taskName (TID $taskId)")
       killed = true
       if (task != null) {
@@ -169,14 +180,14 @@ private[spark] class Executor(
       }
     }
 
-    override def run() {
+    override def run(): Unit = {
       val deserializeStartTime = System.currentTimeMillis()
       Thread.currentThread.setContextClassLoader(replClassLoader)
       val ser = env.closureSerializer.newInstance()
       logInfo(s"Running $taskName (TID $taskId)")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
       var taskStart: Long = 0
-      startGCTime = gcTime
+      startGCTime = computeTotalGcTime()
 
       try {
         val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)
@@ -193,7 +204,6 @@ private[spark] class Executor(
           throw new TaskKilledException
         }
 
-        attemptedTask = Some(task)
         logDebug("Task " + taskId + "'s epoch is " + task.epoch)
         env.mapOutputTracker.updateEpoch(task.epoch)
 
@@ -215,18 +225,17 @@ private[spark] class Executor(
         for (m <- task.metrics) {
           m.setExecutorDeserializeTime(taskStart - deserializeStartTime)
           m.setExecutorRunTime(taskFinish - taskStart)
-          m.setJvmGCTime(gcTime - startGCTime)
+          m.setJvmGCTime(computeTotalGcTime() - startGCTime)
           m.setResultSerializationTime(afterSerialization - beforeSerialization)
         }
 
         val accumUpdates = Accumulators.values
-
         val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
         val serializedDirectResult = ser.serialize(directResult)
         val resultSize = serializedDirectResult.limit
 
         // directSend = sending directly back to the driver
-        val serializedResult = {
+        val serializedResult: ByteBuffer = {
           if (maxResultSize > 0 && resultSize > maxResultSize) {
             logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
               s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
@@ -248,42 +257,40 @@ private[spark] class Executor(
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
 
       } catch {
-        case ffe: FetchFailedException => {
+        case ffe: FetchFailedException =>
           val reason = ffe.toTaskEndReason
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
-        }
 
-        case _: TaskKilledException | _: InterruptedException if task.killed => {
+        case _: TaskKilledException | _: InterruptedException if task.killed =>
           logInfo(s"Executor killed $taskName (TID $taskId)")
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
-        }
 
-        case cDE: CommitDeniedException => {
+        case cDE: CommitDeniedException =>
           val reason = cDE.toTaskEndReason
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
-        }
 
-        case t: Throwable => {
+        case t: Throwable =>
           // Attempt to exit cleanly by informing the driver of our failure.
           // If anything goes wrong (or this was a fatal exception), we will delegate to
           // the default uncaught exception handler, which will terminate the Executor.
           logError(s"Exception in $taskName (TID $taskId)", t)
 
-          val serviceTime = System.currentTimeMillis() - taskStart
-          val metrics = attemptedTask.flatMap(t => t.metrics)
-          for (m <- metrics) {
-            m.setExecutorRunTime(serviceTime)
-            m.setJvmGCTime(gcTime - startGCTime)
+          val metrics: Option[TaskMetrics] = Option(task).flatMap { task =>
+            task.metrics.map { m =>
+              m.setExecutorRunTime(System.currentTimeMillis() - taskStart)
+              m.setJvmGCTime(computeTotalGcTime() - startGCTime)
+              m
+            }
           }
-          val reason = new ExceptionFailure(t, metrics)
-          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
+          val taskEndReason = new ExceptionFailure(t, metrics)
+          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(taskEndReason))
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.
           if (Utils.isFatalError(t)) {
             SparkUncaughtExceptionHandler.uncaughtException(t)
           }
-        }
+
       } finally {
         // Release memory used by this thread for shuffles
         env.shuffleMemoryManager.releaseMemoryForThisThread()
@@ -358,7 +365,7 @@ private[spark] class Executor(
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
         // Fetch file with useCache mode, close cache for local mode.
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
           env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentFiles(name) = timestamp
       }
@@ -370,12 +377,12 @@ private[spark] class Executor(
         if (currentTimeStamp < timestamp) {
           logInfo("Fetching " + name + " with timestamp " + timestamp)
           // Fetch file with useCache mode, close cache for local mode.
-          Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf,
+          Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
             env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
           currentJars(name) = timestamp
           // Add it to our class loader
-          val url = new File(SparkFiles.getRootDirectory, localName).toURI.toURL
-          if (!urlClassLoader.getURLs.contains(url)) {
+          val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL
+          if (!urlClassLoader.getURLs().contains(url)) {
             logInfo("Adding " + url + " to class loader")
             urlClassLoader.addURL(url)
           }
@@ -384,61 +391,70 @@ private[spark] class Executor(
     }
   }
 
-  def startDriverHeartbeater() {
-    val interval = conf.getInt("spark.executor.heartbeatInterval", 10000)
-    val timeout = AkkaUtils.lookupTimeout(conf)
-    val retryAttempts = AkkaUtils.numRetries(conf)
-    val retryIntervalMs = AkkaUtils.retryWaitMs(conf)
-    val heartbeatReceiverRef = AkkaUtils.makeDriverRef("HeartbeatReceiver", conf, env.actorSystem)
+  private val timeout = AkkaUtils.lookupTimeout(conf)
+  private val retryAttempts = AkkaUtils.numRetries(conf)
+  private val retryIntervalMs = AkkaUtils.retryWaitMs(conf)
+  private val heartbeatReceiverRef =
+    AkkaUtils.makeDriverRef("HeartbeatReceiver", conf, env.actorSystem)
+
+  /** Reports heartbeat and metrics for active tasks to the driver. */
+  private def reportHeartBeat(): Unit = {
+    // list of (task id, metrics) to send back to the driver
+    val tasksMetrics = new ArrayBuffer[(Long, TaskMetrics)]()
+    val curGCTime = computeTotalGcTime()
+
+    for (taskRunner <- runningTasks.values()) {
+      if (taskRunner.task != null) {
+        taskRunner.task.metrics.foreach { metrics =>
+          metrics.updateShuffleReadMetrics()
+          metrics.updateInputMetrics()
+          metrics.setJvmGCTime(curGCTime - taskRunner.startGCTime)
+
+          if (isLocal) {
+            // JobProgressListener will hold an reference of it during
+            // onExecutorMetricsUpdate(), then JobProgressListener can not see
+            // the changes of metrics any more, so make a deep copy of it
+            val copiedMetrics = Utils.deserialize[TaskMetrics](Utils.serialize(metrics))
+            tasksMetrics += ((taskRunner.taskId, copiedMetrics))
+          } else {
+            // It will be copied by serialization
+            tasksMetrics += ((taskRunner.taskId, metrics))
+          }
+        }
+      }
+    }
 
-    val t = new Thread() {
+    val message = Heartbeat(executorId, tasksMetrics.toArray, env.blockManager.blockManagerId)
+    try {
+      val response = AkkaUtils.askWithReply[HeartbeatResponse](message, heartbeatReceiverRef,
+        retryAttempts, retryIntervalMs, timeout)
+      if (response.reregisterBlockManager) {
+        logWarning("Told to re-register on heartbeat")
+        env.blockManager.reregister()
+      }
+    } catch {
+      case NonFatal(e) => logWarning("Issue communicating with driver in heartbeater", e)
+    }
+  }
+
+  /**
+   * Starts a thread to report heartbeat and partial metrics for active tasks to driver.
+   * This thread stops running when the executor is stopped.
+   */
+  private def startDriverHeartbeater(): Unit = {
+    val interval = conf.getInt("spark.executor.heartbeatInterval", 10000)
+    val thread = new Thread() {
       override def run() {
         // Sleep a random interval so the heartbeats don't end up in sync
         Thread.sleep(interval + (math.random * interval).asInstanceOf[Int])
-
         while (!isStopped) {
-          val tasksMetrics = new ArrayBuffer[(Long, TaskMetrics)]()
-          val curGCTime = gcTime
-
-          for (taskRunner <- runningTasks.values()) {
-            if (taskRunner.attemptedTask.nonEmpty) {
-              Option(taskRunner.task).flatMap(_.metrics).foreach { metrics =>
-                metrics.updateShuffleReadMetrics()
-                metrics.updateInputMetrics()
-                metrics.setJvmGCTime(curGCTime - taskRunner.startGCTime)
-
-                if (isLocal) {
-                  // JobProgressListener will hold an reference of it during
-                  // onExecutorMetricsUpdate(), then JobProgressListener can not see
-                  // the changes of metrics any more, so make a deep copy of it
-                  val copiedMetrics = Utils.deserialize[TaskMetrics](Utils.serialize(metrics))
-                  tasksMetrics += ((taskRunner.taskId, copiedMetrics))
-                } else {
-                  // It will be copied by serialization
-                  tasksMetrics += ((taskRunner.taskId, metrics))
-                }
-              }
-            }
-          }
-
-          val message = Heartbeat(executorId, tasksMetrics.toArray, env.blockManager.blockManagerId)
-          try {
-            val response = AkkaUtils.askWithReply[HeartbeatResponse](message, heartbeatReceiverRef,
-              retryAttempts, retryIntervalMs, timeout)
-            if (response.reregisterBlockManager) {
-              logWarning("Told to re-register on heartbeat")
-              env.blockManager.reregister()
-            }
-          } catch {
-            case NonFatal(t) => logWarning("Issue communicating with driver in heartbeater", t)
-          }
-
+          reportHeartBeat()
           Thread.sleep(interval)
         }
       }
     }
-    t.setDaemon(true)
-    t.setName("Driver Heartbeater")
-    t.start()
+    thread.setDaemon(true)
+    thread.setName("driver-heartbeater")
+    thread.start()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index c4d73622c4727..293c512f8b70c 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.executor
 
+import java.util.concurrent.ThreadPoolExecutor
+
 import scala.collection.JavaConversions._
 
 import com.codahale.metrics.{Gauge, MetricRegistry}
@@ -24,9 +26,11 @@ import org.apache.hadoop.fs.FileSystem
 
 import org.apache.spark.metrics.source.Source
 
-private[spark] class ExecutorSource(val executor: Executor, executorId: String) extends Source {
+private[spark]
+class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends Source {
+
   private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
-    FileSystem.getAllStatistics().filter(s => s.getScheme.equals(scheme)).headOption
+    FileSystem.getAllStatistics().find(s => s.getScheme.equals(scheme))
 
   private def registerFileSystemStat[T](
         scheme: String, name: String, f: FileSystem.Statistics => T, defaultValue: T) = {
@@ -41,23 +45,23 @@ private[spark] class ExecutorSource(val executor: Executor, executorId: String)
 
   // Gauge for executor thread pool's actively executing task counts
   metricRegistry.register(MetricRegistry.name("threadpool", "activeTasks"), new Gauge[Int] {
-    override def getValue: Int = executor.threadPool.getActiveCount()
+    override def getValue: Int = threadPool.getActiveCount()
   })
 
   // Gauge for executor thread pool's approximate total number of tasks that have been completed
   metricRegistry.register(MetricRegistry.name("threadpool", "completeTasks"), new Gauge[Long] {
-    override def getValue: Long = executor.threadPool.getCompletedTaskCount()
+    override def getValue: Long = threadPool.getCompletedTaskCount()
   })
 
   // Gauge for executor thread pool's current number of threads
   metricRegistry.register(MetricRegistry.name("threadpool", "currentPool_size"), new Gauge[Int] {
-    override def getValue: Int = executor.threadPool.getPoolSize()
+    override def getValue: Int = threadPool.getPoolSize()
   })
 
   // Gauge got executor thread pool's largest number of threads that have ever simultaneously
   // been in th pool
   metricRegistry.register(MetricRegistry.name("threadpool", "maxPool_size"), new Gauge[Int] {
-    override def getValue: Int = executor.threadPool.getMaximumPoolSize()
+    override def getValue: Int = threadPool.getMaximumPoolSize()
   })
 
   // Gauge for file system stats of this executor
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index b6249b492150a..2ab41ba488ff6 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -43,5 +43,5 @@ package org.apache
 
 package object spark {
   // For package docs only
-  val SPARK_VERSION = "1.3.0-SNAPSHOT"
+  val SPARK_VERSION = "1.4.0-SNAPSHOT"
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index cf0433010aa03..a4c74ed03e330 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -377,6 +377,12 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Return a sampled subset of this RDD.
+   * 
+   * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
+   * @param fraction expected size of the sample as a fraction of this RDD's size
+   *  without replacement: probability that each element is chosen; fraction must be [0, 1]
+   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   * @param seed seed for the random number generator
    */
   def sample(withReplacement: Boolean,
       fraction: Double,
@@ -960,7 +966,7 @@ abstract class RDD[T: ClassTag](
    */
   def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = {
     // Clone the zero value since we will also be serializing it as part of tasks
-    var jobResult = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance())
+    var jobResult = Utils.clone(zeroValue, sc.env.serializer.newInstance())
     val cleanSeqOp = sc.clean(seqOp)
     val cleanCombOp = sc.clean(combOp)
     val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index e4170a55b7981..8feac6cb6b7a1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -104,7 +104,7 @@ class DAGScheduler(
    *
    * All accesses to this map should be guarded by synchronizing on it (see SPARK-4454).
    */
-  private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]]
+  private val cacheLocs = new HashMap[Int, Seq[Seq[TaskLocation]]]
 
   // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with
   // every task. When we detect a node failing, we note the current epoch number and failed
@@ -188,14 +188,15 @@ class DAGScheduler(
     eventProcessLoop.post(TaskSetFailed(taskSet, reason))
   }
 
-  private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized {
+  private[scheduler]
+  def getCacheLocs(rdd: RDD[_]): Seq[Seq[TaskLocation]] = cacheLocs.synchronized {
     // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times
     if (!cacheLocs.contains(rdd.id)) {
       val blockIds = rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId]
-      val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster)
-      cacheLocs(rdd.id) = blockIds.map { id =>
-        locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId))
+      val locs: Seq[Seq[TaskLocation]] = blockManagerMaster.getLocations(blockIds).map { bms =>
+        bms.map(bm => TaskLocation(bm.host, bm.executorId))
       }
+      cacheLocs(rdd.id) = locs
     }
     cacheLocs(rdd.id)
   }
@@ -1261,7 +1262,6 @@ class DAGScheduler(
       return true
     }
     val visitedRdds = new HashSet[RDD[_]]
-    val visitedStages = new HashSet[Stage]
     // We are manually maintaining a stack here to prevent StackOverflowError
     // caused by recursively visiting
     val waitingForVisit = new Stack[RDD[_]]
@@ -1273,7 +1273,6 @@ class DAGScheduler(
             case shufDep: ShuffleDependency[_, _, _] =>
               val mapStage = getShuffleMapStage(shufDep, stage.jobId)
               if (!mapStage.isAvailable) {
-                visitedStages += mapStage
                 waitingForVisit.push(mapStage.rdd)
               }  // Otherwise there's no need to follow the dependency back
             case narrowDep: NarrowDependency[_] =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 847a4912eec13..4d9f940813b8e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.Utils
 private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable {
 
   /**
-   * Called by Executor to run this task.
+   * Called by [[Executor]] to run this task.
    *
    * @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext.
    * @param attemptNumber how many times this task has been attempted (0 for the first attempt)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 7a9cf1c2e7f30..f33fd4450b2a6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -145,7 +145,7 @@ private[spark] class TaskSchedulerImpl(
       import sc.env.actorSystem.dispatcher
       sc.env.actorSystem.scheduler.schedule(SPECULATION_INTERVAL milliseconds,
             SPECULATION_INTERVAL milliseconds) {
-        Utils.tryOrExit { checkSpeculatableTasks() }
+        Utils.tryOrStopSparkContext(sc) { checkSpeculatableTasks() }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 6f77fa32ce37b..87ebf31139ce9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -211,6 +211,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
+            addressToExecutorId -= executorInfo.executorAddress
             executorDataMap -= executorId
             executorsPendingToRemove -= executorId
           }
@@ -371,6 +372,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
         logWarning(s"Executor to kill $id does not exist!")
       }
     }
+    // Killing executors means effectively that we want less executors than before, so also update
+    // the target number of executors to avoid having the backend allocate new ones.
+    val newTotal = (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size
+      - filteredExecutorIds.size)
+    doRequestTotalExecutors(newTotal)
+
     executorsPendingToRemove ++= filteredExecutorIds
     doKillExecutors(filteredExecutorIds)
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
index 90dfe14352a8e..e13de0f46ef89 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
@@ -28,7 +28,7 @@ import org.apache.mesos.{Scheduler => MScheduler}
 import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
-import org.apache.spark.{Logging, SparkContext, SparkEnv, SparkException}
+import org.apache.spark.{Logging, SparkContext, SparkEnv, SparkException, TaskState}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.{Utils, AkkaUtils}
@@ -262,20 +262,12 @@ private[spark] class CoarseMesosSchedulerBackend(
       .build()
   }
 
-  /** Check whether a Mesos task state represents a finished task */
-  private def isFinished(state: MesosTaskState) = {
-    state == MesosTaskState.TASK_FINISHED ||
-      state == MesosTaskState.TASK_FAILED ||
-      state == MesosTaskState.TASK_KILLED ||
-      state == MesosTaskState.TASK_LOST
-  }
-
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
     val taskId = status.getTaskId.getValue.toInt
     val state = status.getState
     logInfo("Mesos task " + taskId + " is now " + state)
     synchronized {
-      if (isFinished(state)) {
+      if (TaskState.isFinished(TaskState.fromMesos(state))) {
         val slaveId = taskIdToSlaveId(taskId)
         slaveIdsWithExecutors -= slaveId
         taskIdToSlaveId -= taskId
@@ -285,7 +277,7 @@ private[spark] class CoarseMesosSchedulerBackend(
           coresByTaskId -= taskId
         }
         // If it was a failure, mark the slave as failed for blacklisting purposes
-        if (state == MesosTaskState.TASK_FAILED || state == MesosTaskState.TASK_LOST) {
+        if (TaskState.isFailed(TaskState.fromMesos(state))) {
           failuresBySlaveId(slaveId) = failuresBySlaveId.getOrElse(slaveId, 0) + 1
           if (failuresBySlaveId(slaveId) >= MAX_SLAVE_FAILURES) {
             logInfo("Blacklisting Mesos slave " + slaveId + " due to too many failures; " +
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala
index 705116cb13f54..aa3ec0f8cfb9c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtils.scala
@@ -21,15 +21,11 @@ import org.apache.spark.SparkContext
 
 private[spark] object MemoryUtils {
   // These defaults copied from YARN
-  val OVERHEAD_FRACTION = 1.10
+  val OVERHEAD_FRACTION = 0.10
   val OVERHEAD_MINIMUM = 384
 
   def calculateTotalMemory(sc: SparkContext) = {
-    math.max(
-      sc.conf.getOption("spark.mesos.executor.memoryOverhead")
-        .getOrElse(OVERHEAD_MINIMUM.toString)
-        .toInt + sc.executorMemory,
-        OVERHEAD_FRACTION * sc.executorMemory
-    )
+    sc.conf.getInt("spark.mesos.executor.memoryOverhead",
+      math.max(OVERHEAD_FRACTION * sc.executorMemory, OVERHEAD_MINIMUM).toInt) + sc.executorMemory
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index cfb6592e14aa8..06bb527522141 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -313,24 +313,17 @@ private[spark] class MesosSchedulerBackend(
       .build()
   }
 
-  /** Check whether a Mesos task state represents a finished task */
-  def isFinished(state: MesosTaskState) = {
-    state == MesosTaskState.TASK_FINISHED ||
-      state == MesosTaskState.TASK_FAILED ||
-      state == MesosTaskState.TASK_KILLED ||
-      state == MesosTaskState.TASK_LOST
-  }
-
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
     inClassLoader() {
       val tid = status.getTaskId.getValue.toLong
       val state = TaskState.fromMesos(status.getState)
       synchronized {
-        if (status.getState == MesosTaskState.TASK_LOST && taskIdToSlaveId.contains(tid)) {
+        if (TaskState.isFailed(TaskState.fromMesos(status.getState))
+          && taskIdToSlaveId.contains(tid)) {
           // We lost the executor on this slave, so remember that it's gone
           removeExecutor(taskIdToSlaveId(tid), "Lost executor")
         }
-        if (isFinished(status.getState)) {
+        if (TaskState.isFinished(state)) {
           taskIdToSlaveId.remove(tid)
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index c8b7763f03fb7..80d66e59132da 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1245,10 +1245,10 @@ private[spark] object BlockManager extends Logging {
     }
   }
 
-  def blockIdsToBlockManagers(
+  def blockIdsToHosts(
       blockIds: Array[BlockId],
       env: SparkEnv,
-      blockManagerMaster: BlockManagerMaster = null): Map[BlockId, Seq[BlockManagerId]] = {
+      blockManagerMaster: BlockManagerMaster = null): Map[BlockId, Seq[String]] = {
 
     // blockManagerMaster != null is used in tests
     assert(env != null || blockManagerMaster != null)
@@ -1258,24 +1258,10 @@ private[spark] object BlockManager extends Logging {
       blockManagerMaster.getLocations(blockIds)
     }
 
-    val blockManagers = new HashMap[BlockId, Seq[BlockManagerId]]
+    val blockManagers = new HashMap[BlockId, Seq[String]]
     for (i <- 0 until blockIds.length) {
-      blockManagers(blockIds(i)) = blockLocations(i)
+      blockManagers(blockIds(i)) = blockLocations(i).map(_.host)
     }
     blockManagers.toMap
   }
-
-  def blockIdsToExecutorIds(
-      blockIds: Array[BlockId],
-      env: SparkEnv,
-      blockManagerMaster: BlockManagerMaster = null): Map[BlockId, Seq[String]] = {
-    blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.executorId))
-  }
-
-  def blockIdsToHosts(
-      blockIds: Array[BlockId],
-      env: SparkEnv,
-      blockManagerMaster: BlockManagerMaster = null): Map[BlockId, Seq[String]] = {
-    blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host))
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index af873034215a9..2ab6a8f3ec1d4 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -20,8 +20,8 @@ package org.apache.spark.storage
 import java.text.SimpleDateFormat
 import java.util.{Date, Random}
 
-import tachyon.client.TachyonFS
-import tachyon.client.TachyonFile
+import tachyon.TachyonURI
+import tachyon.client.{TachyonFile, TachyonFS}
 
 import org.apache.spark.Logging
 import org.apache.spark.executor.ExecutorExitCode
@@ -40,7 +40,7 @@ private[spark] class TachyonBlockManager(
     val master: String)
   extends Logging {
 
-  val client = if (master != null && master != "") TachyonFS.get(master) else null
+  val client = if (master != null && master != "") TachyonFS.get(new TachyonURI(master)) else null
 
   if (client == null) {
     logError("Failed to connect to the Tachyon as the master address is not configured")
@@ -60,11 +60,11 @@ private[spark] class TachyonBlockManager(
   addShutdownHook()
 
   def removeFile(file: TachyonFile): Boolean = {
-    client.delete(file.getPath(), false)
+    client.delete(new TachyonURI(file.getPath()), false)
   }
 
   def fileExists(file: TachyonFile): Boolean = {
-    client.exist(file.getPath())
+    client.exist(new TachyonURI(file.getPath()))
   }
 
   def getFile(filename: String): TachyonFile = {
@@ -81,7 +81,7 @@ private[spark] class TachyonBlockManager(
         if (old != null) {
           old
         } else {
-          val path = tachyonDirs(dirId) + "/" + "%02x".format(subDirId)
+          val path = new TachyonURI(s"${tachyonDirs(dirId)}/${"%02x".format(subDirId)}")
           client.mkdir(path)
           val newDir = client.getFile(path)
           subDirs(dirId)(subDirId) = newDir
@@ -89,7 +89,7 @@ private[spark] class TachyonBlockManager(
         }
       }
     }
-    val filePath = subDir + "/" + filename
+    val filePath = new TachyonURI(s"$subDir/$filename")
     if(!client.exist(filePath)) {
       client.createFile(filePath)
     }
@@ -101,7 +101,7 @@ private[spark] class TachyonBlockManager(
 
   // TODO: Some of the logic here could be consolidated/de-duplicated with that in the DiskStore.
   private def createTachyonDirs(): Array[TachyonFile] = {
-    logDebug("Creating tachyon directories at root dirs '" + rootDirs + "'")
+    logDebug(s"Creating tachyon directories at root dirs '$rootDirs'")
     val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
     rootDirs.split(",").map { rootDir =>
       var foundLocalDir = false
@@ -113,22 +113,21 @@ private[spark] class TachyonBlockManager(
         tries += 1
         try {
           tachyonDirId = "%s-%04x".format(dateFormat.format(new Date), rand.nextInt(65536))
-          val path = rootDir + "/" + "spark-tachyon-" + tachyonDirId
+          val path = new TachyonURI(s"$rootDir/spark-tachyon-$tachyonDirId")
           if (!client.exist(path)) {
             foundLocalDir = client.mkdir(path)
             tachyonDir = client.getFile(path)
           }
         } catch {
           case e: Exception =>
-            logWarning("Attempt " + tries + " to create tachyon dir " + tachyonDir + " failed", e)
+            logWarning(s"Attempt $tries to create tachyon dir $tachyonDir failed", e)
         }
       }
       if (!foundLocalDir) {
-        logError("Failed " + MAX_DIR_CREATION_ATTEMPTS + " attempts to create tachyon dir in " +
-          rootDir)
+        logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create tachyon dir in $rootDir")
         System.exit(ExecutorExitCode.TACHYON_STORE_FAILED_TO_CREATE_DIR)
       }
-      logInfo("Created tachyon directory at " + tachyonDir)
+      logInfo(s"Created tachyon directory at $tachyonDir")
       tachyonDir
     }
   }
@@ -145,7 +144,7 @@ private[spark] class TachyonBlockManager(
             }
           } catch {
             case e: Exception =>
-              logError("Exception while deleting tachyon spark dir: " + tachyonDir, e)
+              logError(s"Exception while deleting tachyon spark dir: $tachyonDir", e)
           }
         }
         client.close()
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index ec68837a1516c..ea548f23120d9 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -20,14 +20,15 @@ package org.apache.spark.ui
 import javax.servlet.http.HttpServletRequest
 
 import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
 import scala.xml.Node
 
 import org.eclipse.jetty.servlet.ServletContextHandler
 import org.json4s.JsonAST.{JNothing, JValue}
 
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.Utils
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
 
 /**
  * The top level component of the UI hierarchy that contains the server.
@@ -45,6 +46,7 @@ private[spark] abstract class WebUI(
 
   protected val tabs = ArrayBuffer[WebUITab]()
   protected val handlers = ArrayBuffer[ServletContextHandler]()
+  protected val pageToHandlers = new HashMap[WebUIPage, ArrayBuffer[ServletContextHandler]]
   protected var serverInfo: Option[ServerInfo] = None
   protected val localHostName = Utils.localHostName()
   protected val publicHostName = Option(conf.getenv("SPARK_PUBLIC_DNS")).getOrElse(localHostName)
@@ -60,14 +62,30 @@ private[spark] abstract class WebUI(
     tab.pages.foreach(attachPage)
     tabs += tab
   }
+  
+  def detachTab(tab: WebUITab) {
+    tab.pages.foreach(detachPage)
+    tabs -= tab
+  }
+  
+  def detachPage(page: WebUIPage) {
+    pageToHandlers.remove(page).foreach(_.foreach(detachHandler))
+  }
 
   /** Attach a page to this UI. */
   def attachPage(page: WebUIPage) {
     val pagePath = "/" + page.prefix
-    attachHandler(createServletHandler(pagePath,
-      (request: HttpServletRequest) => page.render(request), securityManager, basePath))
-    attachHandler(createServletHandler(pagePath.stripSuffix("/") + "/json",
-      (request: HttpServletRequest) => page.renderJson(request), securityManager, basePath))
+    val renderHandler = createServletHandler(pagePath,
+      (request: HttpServletRequest) => page.render(request), securityManager, basePath)
+    val renderJsonHandler = createServletHandler(pagePath.stripSuffix("/") + "/json",
+      (request: HttpServletRequest) => page.renderJson(request), securityManager, basePath)
+    attachHandler(renderHandler)
+    attachHandler(renderJsonHandler)
+    pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
+      .append(renderHandler)
+    pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]())
+      .append(renderJsonHandler)
+    
   }
 
   /** Attach a handler to this UI. */
diff --git a/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala
index 18c627e8c7a15..ce7887b76ff96 100644
--- a/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala
@@ -21,6 +21,7 @@ import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 
 import com.google.common.annotations.VisibleForTesting
+import org.apache.spark.SparkContext
 
 /**
  * Asynchronously passes events to registered listeners.
@@ -38,6 +39,8 @@ private[spark] abstract class AsynchronousListenerBus[L <: AnyRef, E](name: Stri
 
   self =>
 
+  private var sparkContext: SparkContext = null
+  
   /* Cap the capacity of the event queue so we get an explicit error (rather than
    * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
   private val EVENT_QUEUE_CAPACITY = 10000
@@ -57,7 +60,7 @@ private[spark] abstract class AsynchronousListenerBus[L <: AnyRef, E](name: Stri
 
   private val listenerThread = new Thread(name) {
     setDaemon(true)
-    override def run(): Unit = Utils.logUncaughtExceptions {
+    override def run(): Unit = Utils.tryOrStopSparkContext(sparkContext) {
       while (true) {
         eventLock.acquire()
         self.synchronized {
@@ -89,9 +92,12 @@ private[spark] abstract class AsynchronousListenerBus[L <: AnyRef, E](name: Stri
    * This first sends out all buffered events posted before this listener bus has started, then
    * listens for any additional events asynchronously while the listener bus is still running.
    * This should only be called once.
+   *
+   * @param sc Used to stop the SparkContext in case the listener thread dies.
    */
-  def start() {
+  def start(sc: SparkContext) {
     if (started.compareAndSet(false, true)) {
+      sparkContext = sc
       listenerThread.start()
     } else {
       throw new IllegalStateException(s"$name already started!")
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index bce3b3afe9aba..26ffbf9350388 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -18,18 +18,16 @@
 package org.apache.spark.util
 
 import java.lang.management.ManagementFactory
-import java.lang.reflect.{Array => JArray}
-import java.lang.reflect.Field
-import java.lang.reflect.Modifier
-import java.util.IdentityHashMap
-import java.util.Random
+import java.lang.reflect.{Field, Modifier}
+import java.util.{IdentityHashMap, Random}
 import java.util.concurrent.ConcurrentHashMap
-
 import scala.collection.mutable.ArrayBuffer
+import scala.runtime.ScalaRunTime
 
 import org.apache.spark.Logging
 import org.apache.spark.util.collection.OpenHashSet
 
+
 /**
  * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
  * memory-aware caches.
@@ -184,9 +182,9 @@ private[spark] object SizeEstimator extends Logging {
   private val ARRAY_SIZE_FOR_SAMPLING = 200
   private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
 
-  private def visitArray(array: AnyRef, cls: Class[_], state: SearchState) {
-    val length = JArray.getLength(array)
-    val elementClass = cls.getComponentType
+  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState) {
+    val length = ScalaRunTime.array_length(array)
+    val elementClass = arrayClass.getComponentType()
 
     // Arrays have object header and length field which is an integer
     var arrSize: Long = alignSize(objectSize + INT_SIZE)
@@ -199,22 +197,26 @@ private[spark] object SizeEstimator extends Logging {
       state.size += arrSize
 
       if (length <= ARRAY_SIZE_FOR_SAMPLING) {
-        for (i <- 0 until length) {
-          state.enqueue(JArray.get(array, i))
+        var arrayIndex = 0
+        while (arrayIndex < length) {
+          state.enqueue(ScalaRunTime.array_apply(array, arrayIndex).asInstanceOf[AnyRef])
+          arrayIndex += 1
         }
       } else {
         // Estimate the size of a large array by sampling elements without replacement.
         var size = 0.0
         val rand = new Random(42)
         val drawn = new OpenHashSet[Int](ARRAY_SAMPLE_SIZE)
-        for (i <- 0 until ARRAY_SAMPLE_SIZE) {
+        var numElementsDrawn = 0
+        while (numElementsDrawn < ARRAY_SAMPLE_SIZE) {
           var index = 0
           do {
             index = rand.nextInt(length)
           } while (drawn.contains(index))
           drawn.add(index)
-          val elem = JArray.get(array, index)
+          val elem = ScalaRunTime.array_apply(array, index).asInstanceOf[AnyRef]
           size += SizeEstimator.estimate(elem, state.visited)
+          numElementsDrawn += 1
         }
         state.size += ((length / (ARRAY_SAMPLE_SIZE * 1.0)) * size).toLong
       }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index d3dc1d09cb7b4..91d833295e376 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -42,6 +42,8 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.log4j.PropertyConfigurator
 import org.eclipse.jetty.util.MultiException
 import org.json4s._
+
+import tachyon.TachyonURI
 import tachyon.client.{TachyonFS, TachyonFile}
 
 import org.apache.spark._
@@ -288,7 +290,7 @@ private[spark] object Utils extends Logging {
       } catch { case e: SecurityException => dir = null; }
     }
 
-    dir
+    dir.getCanonicalFile
   }
 
   /**
@@ -403,7 +405,8 @@ private[spark] object Utils extends Logging {
       useCache: Boolean) {
     val fileName = url.split("/").last
     val targetFile = new File(targetDir, fileName)
-    if (useCache) {
+    val fetchCacheEnabled = conf.getBoolean("spark.files.useFetchCache", defaultValue = true)
+    if (useCache && fetchCacheEnabled) {
       val cachedFileName = s"${url.hashCode}${timestamp}_cache"
       val lockFileName = s"${url.hashCode}${timestamp}_lock"
       val localDir = new File(getLocalDir(conf))
@@ -969,7 +972,7 @@ private[spark] object Utils extends Logging {
    * Delete a file or directory and its contents recursively.
    */
   def deleteRecursively(dir: TachyonFile, client: TachyonFS) {
-    if (!client.delete(dir.getPath(), true)) {
+    if (!client.delete(new TachyonURI(dir.getPath()), true)) {
       throw new IOException("Failed to delete the tachyon dir: " + dir)
     }
   }
@@ -1145,6 +1148,8 @@ private[spark] object Utils extends Logging {
   /**
    * Execute a block of code that evaluates to Unit, forwarding any uncaught exceptions to the
    * default UncaughtExceptionHandler
+   * 
+   * NOTE: This method is to be called by the spark-started JVM process.
    */
   def tryOrExit(block: => Unit) {
     try {
@@ -1155,6 +1160,32 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /**
+   * Execute a block of code that evaluates to Unit, stop SparkContext is there is any uncaught 
+   * exception
+   *  
+   * NOTE: This method is to be called by the driver-side components to avoid stopping the 
+   * user-started JVM process completely; in contrast, tryOrExit is to be called in the 
+   * spark-started JVM process .
+   */
+  def tryOrStopSparkContext(sc: SparkContext)(block: => Unit) {
+    try {
+      block
+    } catch {
+      case e: ControlThrowable => throw e
+      case t: Throwable =>
+        val currentThreadName = Thread.currentThread().getName
+        if (sc != null) {
+          logError(s"uncaught error in thread $currentThreadName, stopping SparkContext", t)
+          sc.stop()
+        }
+        if (!NonFatal(t)) {
+          logError(s"throw uncaught fatal error in thread $currentThreadName", t)
+          throw t
+        }
+    }
+  }
+
   /**
    * Execute a block of code that evaluates to Unit, re-throwing any non-fatal uncaught
    * exceptions as IOException.  This is used when implementing Externalizable and Serializable's
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index 74e88c767ee07..d4b5bb519157c 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -24,11 +24,12 @@
 import java.util.*;
 import java.util.concurrent.*;
 
-import org.apache.spark.input.PortableDataStream;
+import scala.collection.JavaConversions;
 import scala.Tuple2;
 import scala.Tuple3;
 import scala.Tuple4;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
@@ -51,8 +52,11 @@
 import org.apache.spark.api.java.*;
 import org.apache.spark.api.java.function.*;
 import org.apache.spark.executor.TaskMetrics;
+import org.apache.spark.input.PortableDataStream;
 import org.apache.spark.partial.BoundedDouble;
 import org.apache.spark.partial.PartialResult;
+import org.apache.spark.rdd.RDD;
+import org.apache.spark.serializer.KryoSerializer;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.util.StatCounter;
 
@@ -267,6 +271,22 @@ public void call(String s) throws IOException {
     Assert.assertEquals(2, accum.value().intValue());
   }
 
+  @Test
+  public void foreachPartition() {
+    final Accumulator<Integer> accum = sc.accumulator(0);
+    JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
+    rdd.foreachPartition(new VoidFunction<Iterator<String>>() {
+      @Override
+      public void call(Iterator<String> iter) throws IOException {
+        while (iter.hasNext()) {
+          iter.next();
+          accum.add(1);
+        }
+      }
+    });
+    Assert.assertEquals(2, accum.value().intValue());
+  }
+
   @Test
   public void toLocalIterator() {
     List<Integer> correct = Arrays.asList(1, 2, 3, 4);
@@ -657,6 +677,13 @@ public Boolean call(Integer i) {
         }).isEmpty());
   }
 
+  @Test
+  public void toArray() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3));
+    List<Integer> list = rdd.toArray();
+    Assert.assertEquals(Arrays.asList(1, 2, 3), list);
+  }
+
   @Test
   public void cartesian() {
     JavaDoubleRDD doubleRDD = sc.parallelizeDoubles(Arrays.asList(1.0, 1.0, 2.0, 3.0, 5.0, 8.0));
@@ -703,8 +730,8 @@ public void javaDoubleRDDHistoGram() {
     Tuple2<double[], long[]> results = rdd.histogram(2);
     double[] expected_buckets = {1.0, 2.5, 4.0};
     long[] expected_counts = {2, 2};
-    Assert.assertArrayEquals(expected_buckets, results._1, 0.1);
-    Assert.assertArrayEquals(expected_counts, results._2);
+    Assert.assertArrayEquals(expected_buckets, results._1(), 0.1);
+    Assert.assertArrayEquals(expected_counts, results._2());
     // Test with provided buckets
     long[] histogram = rdd.histogram(expected_buckets);
     Assert.assertArrayEquals(expected_counts, histogram);
@@ -714,6 +741,80 @@ public void javaDoubleRDDHistoGram() {
         sc.parallelizeDoubles(new ArrayList<Double>(0), 1).histogram(new double[]{0.0, 1.0}));
   }
 
+  private static class DoubleComparator implements Comparator<Double>, Serializable {
+    public int compare(Double o1, Double o2) {
+      return o1.compareTo(o2);
+    }
+  }
+
+  @Test
+  public void max() {
+    JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0));
+    double max = rdd.max(new DoubleComparator());
+    Assert.assertEquals(4.0, max, 0.001);
+  }
+
+  @Test
+  public void min() {
+    JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0));
+    double max = rdd.min(new DoubleComparator());
+    Assert.assertEquals(1.0, max, 0.001);
+  }
+
+  @Test
+  public void takeOrdered() {
+    JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0));
+    Assert.assertEquals(Arrays.asList(1.0, 2.0), rdd.takeOrdered(2, new DoubleComparator()));
+    Assert.assertEquals(Arrays.asList(1.0, 2.0), rdd.takeOrdered(2));
+  }
+
+  @Test
+  public void top() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
+    List<Integer> top2 = rdd.top(2);
+    Assert.assertEquals(Arrays.asList(4, 3), top2);
+  }
+
+  private static class AddInts implements Function2<Integer, Integer, Integer> {
+    @Override
+    public Integer call(Integer a, Integer b) {
+      return a + b;
+    }
+  }
+
+  @Test
+  public void reduce() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
+    int sum = rdd.reduce(new AddInts());
+    Assert.assertEquals(10, sum);
+  }
+
+  @Test
+  public void reduceOnJavaDoubleRDD() {
+    JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0));
+    double sum = rdd.reduce(new Function2<Double, Double, Double>() {
+      @Override
+      public Double call(Double v1, Double v2) throws Exception {
+        return v1 + v2;
+      }
+    });
+    Assert.assertEquals(10.0, sum, 0.001);
+  }
+
+  @Test
+  public void fold() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
+    int sum = rdd.fold(0, new AddInts());
+    Assert.assertEquals(10, sum);
+  }
+
+  @Test
+  public void aggregate() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
+    int sum = rdd.aggregate(0, new AddInts(), new AddInts());
+    Assert.assertEquals(10, sum);
+  }
+
   @Test
   public void map() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
@@ -830,6 +931,25 @@ public Iterable<Integer> call(Iterator<Integer> iter) {
     Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
   }
 
+
+  @Test
+  public void mapPartitionsWithIndex() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
+    JavaRDD<Integer> partitionSums = rdd.mapPartitionsWithIndex(
+      new Function2<Integer, Iterator<Integer>, Iterator<Integer>>() {
+        @Override
+        public Iterator<Integer> call(Integer index, Iterator<Integer> iter) throws Exception {
+          int sum = 0;
+          while (iter.hasNext()) {
+            sum += iter.next();
+          }
+          return Collections.singletonList(sum).iterator();
+        }
+    }, false);
+    Assert.assertEquals("[3, 7]", partitionSums.collect().toString());
+  }
+
+
   @Test
   public void repartition() {
     // Shrinking number of partitions
@@ -1308,6 +1428,49 @@ public void checkpointAndRestore() {
     Assert.assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
   }
 
+  @Test
+  public void combineByKey() {
+    JavaRDD<Integer> originalRDD = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6));
+    Function<Integer, Integer> keyFunction = new Function<Integer, Integer>() {
+      @Override
+      public Integer call(Integer v1) throws Exception {
+        return v1 % 3;
+      }
+    };
+    Function<Integer, Integer> createCombinerFunction = new Function<Integer, Integer>() {
+      @Override
+      public Integer call(Integer v1) throws Exception {
+        return v1;
+      }
+    };
+
+    Function2<Integer, Integer, Integer> mergeValueFunction = new Function2<Integer, Integer, Integer>() {
+      @Override
+      public Integer call(Integer v1, Integer v2) throws Exception {
+        return v1 + v2;
+      }
+    };
+
+    JavaPairRDD<Integer, Integer> combinedRDD = originalRDD.keyBy(keyFunction)
+        .combineByKey(createCombinerFunction, mergeValueFunction, mergeValueFunction);
+    Map<Integer, Integer> results = combinedRDD.collectAsMap();
+    ImmutableMap<Integer, Integer> expected = ImmutableMap.of(0, 9, 1, 5, 2, 7);
+    Assert.assertEquals(expected, results);
+
+    Partitioner defaultPartitioner = Partitioner.defaultPartitioner(
+        combinedRDD.rdd(), JavaConversions.asScalaBuffer(Lists.<RDD<?>>newArrayList()));
+    combinedRDD = originalRDD.keyBy(keyFunction)
+        .combineByKey(
+             createCombinerFunction,
+             mergeValueFunction,
+             mergeValueFunction,
+             defaultPartitioner,
+             false,
+             new KryoSerializer(new SparkConf()));
+    results = combinedRDD.collectAsMap();
+    Assert.assertEquals(expected, results);
+  }
+
   @SuppressWarnings("unchecked")
   @Test
   public void mapOnPairRDD() {
@@ -1516,6 +1679,19 @@ public void collectAsync() throws Exception {
     Assert.assertEquals(1, future.jobIds().size());
   }
 
+  @Test
+  public void takeAsync() throws Exception {
+    List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
+    JavaRDD<Integer> rdd = sc.parallelize(data, 1);
+    JavaFutureAction<List<Integer>> future = rdd.takeAsync(1);
+    List<Integer> result = future.get();
+    Assert.assertEquals(1, result.size());
+    Assert.assertEquals((Integer) 1, result.get(0));
+    Assert.assertFalse(future.isCancelled());
+    Assert.assertTrue(future.isDone());
+    Assert.assertEquals(1, future.jobIds().size());
+  }
+
   @Test
   public void foreachAsync() throws Exception {
     List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 3b10b3a042317..32abc65385267 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -33,8 +33,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
 
   override def beforeEach() {
     super.beforeEach()
-    checkpointDir = File.createTempFile("temp", "")
-    checkpointDir.deleteOnExit()
+    checkpointDir = File.createTempFile("temp", "", Utils.createTempDir())
     checkpointDir.delete()
     sc = new SparkContext("local", "test")
     sc.setCheckpointDir(checkpointDir.toString)
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index 43fbd3ff3f756..62cb7649c0284 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -21,6 +21,8 @@ import java.io.File
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.util.Utils
+
 class SecurityManagerSuite extends FunSuite {
 
   test("set security with conf") {
@@ -160,8 +162,7 @@ class SecurityManagerSuite extends FunSuite {
   }
 
   test("ssl off setup") {
-    val file = File.createTempFile("SSLOptionsSuite", "conf")
-    file.deleteOnExit()
+    val file = File.createTempFile("SSLOptionsSuite", "conf", Utils.createTempDir())
 
     System.setProperty("spark.ssl.configFile", file.getAbsolutePath)
     val conf = new SparkConf()
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 50f347f1954de..b07c4d93db4e6 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -81,24 +81,48 @@ class SparkContextSuite extends FunSuite with LocalSparkContext {
   }
 
   test("addFile works") {
-    val file = File.createTempFile("someprefix", "somesuffix")
-    val absolutePath = file.getAbsolutePath
+    val dir = Utils.createTempDir()
+
+    val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
+    val absolutePath1 = file1.getAbsolutePath
+
+    val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
+    val relativePath = file2.getParent + "/../" + file2.getParentFile.getName + "/" + file2.getName
+    val absolutePath2 = file2.getAbsolutePath
+
     try {
-      Files.write("somewords", file, UTF_8)
-      val length = file.length()
+      Files.write("somewords1", file1, UTF_8)
+      Files.write("somewords2", file2, UTF_8)
+      val length1 = file1.length()
+      val length2 = file2.length()
+
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
-      sc.addFile(file.getAbsolutePath)
+      sc.addFile(file1.getAbsolutePath)
+      sc.addFile(relativePath)
       sc.parallelize(Array(1), 1).map(x => {
-        val gotten = new File(SparkFiles.get(file.getName))
-        if (!gotten.exists()) {
-          throw new SparkException("file doesn't exist")
+        val gotten1 = new File(SparkFiles.get(file1.getName))
+        val gotten2 = new File(SparkFiles.get(file2.getName))
+        if (!gotten1.exists()) {
+          throw new SparkException("file doesn't exist : " + absolutePath1)
+        }
+        if (!gotten2.exists()) {
+          throw new SparkException("file doesn't exist : " + absolutePath2)
         }
-        if (length != gotten.length()) {
+
+        if (length1 != gotten1.length()) {
+          throw new SparkException(
+            s"file has different length $length1 than added file ${gotten1.length()} : " + absolutePath1)
+        }
+        if (length2 != gotten2.length()) {
           throw new SparkException(
-            s"file has different length $length than added file ${gotten.length()}")
+            s"file has different length $length2 than added file ${gotten2.length()} : " + absolutePath2)
+        }
+
+        if (absolutePath1 == gotten1.getAbsolutePath) {
+          throw new SparkException("file should have been copied :" + absolutePath1)
         }
-        if (absolutePath == gotten.getAbsolutePath) {
-          throw new SparkException("file should have been copied")
+        if (absolutePath2 == gotten2.getAbsolutePath) {
+          throw new SparkException("file should have been copied : " + absolutePath2)
         }
         x
       }).count()
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 46d745c4ecbfa..4561e5b8e9663 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -402,8 +402,10 @@ class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties
     val archives = "file:/archive1,archive2" // spark.yarn.dist.archives
     val pyFiles = "py-file1,py-file2" // spark.submit.pyFiles
 
+    val tmpDir = Utils.createTempDir()
+
     // Test jars and files
-    val f1 = File.createTempFile("test-submit-jars-files", "")
+    val f1 = File.createTempFile("test-submit-jars-files", "", tmpDir)
     val writer1 = new PrintWriter(f1)
     writer1.println("spark.jars " + jars)
     writer1.println("spark.files " + files)
@@ -420,7 +422,7 @@ class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties
     sysProps("spark.files") should be(Utils.resolveURIs(files))
 
     // Test files and archives (Yarn)
-    val f2 = File.createTempFile("test-submit-files-archives", "")
+    val f2 = File.createTempFile("test-submit-files-archives", "", tmpDir)
     val writer2 = new PrintWriter(f2)
     writer2.println("spark.yarn.dist.files " + files)
     writer2.println("spark.yarn.dist.archives " + archives)
@@ -437,7 +439,7 @@ class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties
     sysProps2("spark.yarn.dist.archives") should be(Utils.resolveURIs(archives))
 
     // Test python files
-    val f3 = File.createTempFile("test-submit-python-files", "")
+    val f3 = File.createTempFile("test-submit-python-files", "", tmpDir)
     val writer3 = new PrintWriter(f3)
     writer3.println("spark.submit.pyFiles " + pyFiles)
     writer3.close()
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 1a9a0e857e546..aea76c1adcc09 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -22,7 +22,6 @@ import java.io.File
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapred.{FileSplit, JobConf, TextInputFormat}
-import org.apache.spark._
 import org.scalatest.FunSuite
 
 import scala.collection.Map
@@ -30,6 +29,9 @@ import scala.language.postfixOps
 import scala.sys.process._
 import scala.util.Try
 
+import org.apache.spark._
+import org.apache.spark.util.Utils
+
 class PipedRDDSuite extends FunSuite with SharedSparkContext {
 
   test("basic pipe") {
@@ -141,7 +143,7 @@ class PipedRDDSuite extends FunSuite with SharedSparkContext {
       // make sure symlinks were created
       assert(pipedLs.length > 0)
       // clean up top level tasks directory
-      new File("tasks").delete()
+      Utils.deleteRecursively(new File("tasks"))
     } else {
       assert(true)
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 30119ce5d4eec..63360a0f189a3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -322,6 +322,18 @@ class DAGSchedulerSuite extends FunSuiteLike  with BeforeAndAfter with LocalSpar
     assertDataStructuresEmpty
   }
 
+  test("regression test for getCacheLocs") {
+    val rdd = new MyRDD(sc, 3, Nil)
+    cacheLocations(rdd.id -> 0) =
+      Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
+    cacheLocations(rdd.id -> 1) =
+      Seq(makeBlockManagerId("hostB"), makeBlockManagerId("hostC"))
+    cacheLocations(rdd.id -> 2) =
+      Seq(makeBlockManagerId("hostC"), makeBlockManagerId("hostD"))
+    val locs = scheduler.getCacheLocs(rdd).map(_.map(_.host))
+    assert(locs === Seq(Seq("hostA", "hostB"), Seq("hostB", "hostC"), Seq("hostC", "hostD")))
+  }
+
   test("avoid exponential blowup when getting preferred locs list") {
     // Build up a complex dependency graph with repeated zip operations, without preferred locations.
     var rdd: RDD[_] = new MyRDD(sc, 1, Nil)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 992dde66f982f..448258a754153 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -25,9 +25,9 @@ import scala.io.Source
 
 import org.apache.hadoop.fs.Path
 import org.json4s.jackson.JsonMethods._
-import org.scalatest.{BeforeAndAfter, FunSuite}
+import org.scalatest.{FunSuiteLike, BeforeAndAfter, FunSuite}
 
-import org.apache.spark.{Logging, SparkConf, SparkContext, SPARK_VERSION}
+import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.io._
 import org.apache.spark.util.{JsonProtocol, Utils}
@@ -39,7 +39,8 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  * logging events, whether the parsing of the file names is correct, and whether the logged events
  * can be read and deserialized into actual SparkListenerEvents.
  */
-class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter with Logging {
+class EventLoggingListenerSuite extends FunSuite with LocalSparkContext with BeforeAndAfter
+  with Logging {
   import EventLoggingListenerSuite._
 
   private val fileSystem = Utils.getHadoopFileSystem("/",
@@ -144,7 +145,7 @@ class EventLoggingListenerSuite extends FunSuite with BeforeAndAfter with Loggin
 
     // A comprehensive test on JSON de/serialization of all events is in JsonProtocolSuite
     eventLogger.start()
-    listenerBus.start()
+    listenerBus.start(sc)
     listenerBus.addListener(eventLogger)
     listenerBus.postToAll(applicationStart)
     listenerBus.postToAll(applicationEnd)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 3a41ee8d4ae0c..627c9a4ddfffc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -46,7 +46,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     assert(counter.count === 0)
 
     // Starting listener bus should flush all buffered events
-    bus.start()
+    bus.start(sc)
     assert(bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
     assert(counter.count === 5)
 
@@ -58,8 +58,8 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     // Listener bus must not be started twice
     intercept[IllegalStateException] {
       val bus = new LiveListenerBus
-      bus.start()
-      bus.start()
+      bus.start(sc)
+      bus.start(sc)
     }
 
     // ... or stopped before starting
@@ -96,7 +96,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     val blockingListener = new BlockingListener
 
     bus.addListener(blockingListener)
-    bus.start()
+    bus.start(sc)
     bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
 
     listenerStarted.acquire()
@@ -347,7 +347,7 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with Matchers
     bus.addListener(badListener)
     bus.addListener(jobCounter1)
     bus.addListener(jobCounter2)
-    bus.start()
+    bus.start(sc)
 
     // Post events to all listeners, and wait until the queue is drained
     (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala
new file mode 100644
index 0000000000000..3fa0115e68259
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster.mesos
+
+import org.mockito.Mockito._
+import org.scalatest.FunSuite
+import org.scalatest.mock.MockitoSugar
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+class MemoryUtilsSuite extends FunSuite with MockitoSugar {
+  test("MesosMemoryUtils should always override memoryOverhead when it's set") {
+    val sparkConf = new SparkConf
+
+    val sc = mock[SparkContext]
+    when(sc.conf).thenReturn(sparkConf)
+    
+    // 384 > sc.executorMemory * 0.1 => 512 + 384 = 896
+    when(sc.executorMemory).thenReturn(512)
+    assert(MemoryUtils.calculateTotalMemory(sc) === 896)
+    
+    // 384 < sc.executorMemory * 0.1 => 4096 + (4096 * 0.1) = 4505.6
+    when(sc.executorMemory).thenReturn(4096)
+    assert(MemoryUtils.calculateTotalMemory(sc) === 4505)
+
+    // set memoryOverhead
+    sparkConf.set("spark.mesos.executor.memoryOverhead", "100")
+    assert(MemoryUtils.calculateTotalMemory(sc) === 4196)
+    sparkConf.set("spark.mesos.executor.memoryOverhead", "400")
+    assert(MemoryUtils.calculateTotalMemory(sc) === 4496)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
similarity index 98%
rename from core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
index afbaa9ade811f..f1a4380d349b3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.mesos
+package org.apache.spark.scheduler.cluster.mesos
 
 import java.nio.ByteBuffer
 import java.util
@@ -24,21 +24,20 @@ import java.util.Collections
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.mesos.SchedulerDriver
-import org.apache.mesos.Protos._
 import org.apache.mesos.Protos.Value.Scalar
-import org.mockito.Mockito._
+import org.apache.mesos.Protos._
+import org.apache.mesos.SchedulerDriver
 import org.mockito.Matchers._
+import org.mockito.Mockito._
 import org.mockito.{ArgumentCaptor, Matchers}
 import org.scalatest.FunSuite
 import org.scalatest.mock.MockitoSugar
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext}
 import org.apache.spark.executor.MesosExecutorBackend
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerExecutorAdded,
   TaskDescription, TaskSchedulerImpl, WorkerOffer}
-import org.apache.spark.scheduler.cluster.ExecutorInfo
-import org.apache.spark.scheduler.cluster.mesos.{MesosSchedulerBackend, MemoryUtils}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext}
 
 class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with MockitoSugar {
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchDataSuite.scala
similarity index 92%
rename from core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchDataSuite.scala
index 86a42a7398e4d..eebcba40f8a1c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/mesos/MesosTaskLaunchDataSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosTaskLaunchDataSuite.scala
@@ -15,14 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.mesos
+package org.apache.spark.scheduler.cluster.mesos
 
 import java.nio.ByteBuffer
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.scheduler.cluster.mesos.MesosTaskLaunchData
-
 class MesosTaskLaunchDataSuite extends FunSuite {
   test("serialize and deserialize data must be same") {
     val serializedTask = ByteBuffer.allocate(40)
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala
index c21c92b63ad13..78bbc4ec2c620 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockObjectWriterSuite.scala
@@ -16,16 +16,18 @@
  */
 package org.apache.spark.storage
 
-import org.scalatest.FunSuite
 import java.io.File
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkConf
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.serializer.JavaSerializer
-import org.apache.spark.SparkConf
+import org.apache.spark.util.Utils
 
 class BlockObjectWriterSuite extends FunSuite {
   test("verify write metrics") {
-    val file = new File("somefile")
-    file.deleteOnExit()
+    val file = new File(Utils.createTempDir(), "somefile")
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(new TestBlockId("0"), file,
       new JavaSerializer(new SparkConf()), 1024, os => os, true, writeMetrics)
@@ -47,8 +49,7 @@ class BlockObjectWriterSuite extends FunSuite {
   }
 
   test("verify write metrics on revert") {
-    val file = new File("somefile")
-    file.deleteOnExit()
+    val file = new File(Utils.createTempDir(), "somefile")
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(new TestBlockId("0"), file,
       new JavaSerializer(new SparkConf()), 1024, os => os, true, writeMetrics)
@@ -71,8 +72,7 @@ class BlockObjectWriterSuite extends FunSuite {
   }
 
   test("Reopening a closed block writer") {
-    val file = new File("somefile")
-    file.deleteOnExit()
+    val file = new File(Utils.createTempDir(), "somefile")
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(new TestBlockId("0"), file,
       new JavaSerializer(new SparkConf()), 1024, os => os, true, writeMetrics)
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 6a972381faf14..0d155982a8c54 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -17,20 +17,24 @@
 
 package org.apache.spark.ui
 
+import javax.servlet.http.HttpServletRequest
+
 import scala.collection.JavaConversions._
+import scala.xml.Node
 
-import org.openqa.selenium.{By, WebDriver}
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
+import org.openqa.selenium.{By, WebDriver}
 import org.scalatest._
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
+import org.apache.spark._
 import org.apache.spark.api.java.StorageLevels
 import org.apache.spark.shuffle.FetchFailedException
 
+
 /**
  * Selenium tests for the Spark Web UI.
  */
@@ -310,4 +314,46 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before
       }
     }
   }
+
+  test("attaching and detaching a new tab") {
+    withSpark(newSparkContext()) { sc =>
+      val sparkUI = sc.ui.get
+
+      val newTab = new WebUITab(sparkUI, "foo") {
+        attachPage(new WebUIPage("") {
+          def render(request: HttpServletRequest): Seq[Node] = {
+            <b>"html magic"</b>
+          }
+        })
+      }
+      sparkUI.attachTab(newTab)
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="environment"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="foo"]""")) should not be(None)
+      }
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        // check whether new page exists
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        find(cssSelector("b")).get.text should include ("html magic")
+      }
+      sparkUI.detachTab(newTab)
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sc.ui.get.appUIAddress.stripSuffix("/"))
+        find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="stages"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="storage"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="environment"]""")) should not be(None)
+        find(cssSelector("""ul li a[href*="foo"]""")) should be(None)
+      }
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        // check new page not exist
+        go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo")
+        find(cssSelector("b")) should be(None)
+      }
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 92a21f82f3c21..77a038dc1720d 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ui
 
 import java.net.ServerSocket
-import javax.servlet.http.HttpServletRequest
 
 import scala.io.Source
 import scala.util.{Failure, Success, Try}
@@ -28,9 +27,8 @@ import org.scalatest.FunSuite
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.{SparkContext, SparkConf}
 import org.apache.spark.LocalSparkContext._
-import scala.xml.Node
+import org.apache.spark.{SparkConf, SparkContext}
 
 class UISuite extends FunSuite {
 
@@ -72,40 +70,6 @@ class UISuite extends FunSuite {
     }
   }
 
-  ignore("attaching a new tab") {
-    withSpark(newSparkContext()) { sc =>
-      val sparkUI = sc.ui.get
-
-      val newTab = new WebUITab(sparkUI, "foo") {
-        attachPage(new WebUIPage("") {
-          def render(request: HttpServletRequest): Seq[Node] = {
-            <b>"html magic"</b>
-          }
-        })
-      }
-      sparkUI.attachTab(newTab)
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        val html = Source.fromURL(sparkUI.appUIAddress).mkString
-        assert(!html.contains("random data that should not be present"))
-
-        // check whether new page exists
-        assert(html.toLowerCase.contains("foo"))
-
-        // check whether other pages still exist
-        assert(html.toLowerCase.contains("stages"))
-        assert(html.toLowerCase.contains("storage"))
-        assert(html.toLowerCase.contains("environment"))
-        assert(html.toLowerCase.contains("executors"))
-      }
-
-      eventually(timeout(10 seconds), interval(50 milliseconds)) {
-        val html = Source.fromURL(sparkUI.appUIAddress.stripSuffix("/") + "/foo").mkString
-        // check whether new page exists
-        assert(html.contains("magic"))
-      }
-    }
-  }
-
   test("jetty selects different port under contention") {
     val server = new ServerSocket(0)
     val startPort = server.getLocalPort
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 4dc5b6103db74..43b6a405cb68c 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.logging.{RollingFileAppender, SizeBasedRollingPolic
 
 class FileAppenderSuite extends FunSuite with BeforeAndAfter with Logging {
 
-  val testFile = new File("FileAppenderSuite-test-" + System.currentTimeMillis).getAbsoluteFile
+  val testFile = new File(Utils.createTempDir(), "FileAppenderSuite-test").getAbsoluteFile
 
   before {
     cleanup()
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index b91428efadfd0..5d93086082189 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -122,7 +122,6 @@ class UtilsSuite extends FunSuite with ResetSystemProperties {
 
   test("reading offset bytes of a file") {
     val tmpDir2 = Utils.createTempDir()
-    tmpDir2.deleteOnExit()
     val f1Path = tmpDir2 + "/f1"
     val f1 = new FileOutputStream(f1Path)
     f1.write("1\n2\n3\n4\n5\n6\n7\n8\n9\n".getBytes(UTF_8))
@@ -151,7 +150,6 @@ class UtilsSuite extends FunSuite with ResetSystemProperties {
 
   test("reading offset bytes across multiple files") {
     val tmpDir = Utils.createTempDir()
-    tmpDir.deleteOnExit()
     val files = (1 to 3).map(i => new File(tmpDir, i.toString))
     Files.write("0123456789", files(0), UTF_8)
     Files.write("abcdefghij", files(1), UTF_8)
@@ -357,7 +355,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties {
   }
 
   test("loading properties from file") {
-    val outFile = File.createTempFile("test-load-spark-properties", "test")
+    val tmpDir = Utils.createTempDir()
+    val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
     try {
       System.setProperty("spark.test.fileNameLoadB", "2")
       Files.write("spark.test.fileNameLoadA true\n" +
@@ -370,7 +369,7 @@ class UtilsSuite extends FunSuite with ResetSystemProperties {
       assert(sparkConf.getBoolean("spark.test.fileNameLoadA", false) === true)
       assert(sparkConf.getInt("spark.test.fileNameLoadB", 1) === 2)
     } finally {
-      outFile.delete()
+      Utils.deleteRecursively(tmpDir)
     }
   }
 
diff --git a/dev/lint-python b/dev/lint-python
index 772f856154ae0..fded654893a7c 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -19,43 +19,53 @@
 
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
-PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
+PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/"
+PYTHON_LINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/python-lint-report.txt"
 
 cd "$SPARK_ROOT_DIR"
 
+# compileall: https://docs.python.org/2/library/compileall.html
+python -B -m compileall -q -l $PATHS_TO_CHECK > "$PYTHON_LINT_REPORT_PATH"
+compile_status="${PIPESTATUS[0]}"
+
 # Get pep8 at runtime so that we don't rely on it being installed on the build server.
 #+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
 #+ TODOs:
-#+  - Dynamically determine latest release version of pep8 and use that.
-#+  - Download this from a more reliable source. (GitHub raw can be flaky, apparently. (?))
+#+  - Download pep8 from PyPI. It's more "official".
 PEP8_SCRIPT_PATH="$SPARK_ROOT_DIR/dev/pep8.py"
-PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/1.5.7/pep8.py"
-PEP8_PATHS_TO_CHECK="./python/pyspark/ ./ec2/spark_ec2.py ./examples/src/main/python/"
+PEP8_SCRIPT_REMOTE_PATH="https://raw.githubusercontent.com/jcrocholl/pep8/1.6.2/pep8.py"
 
+# if [ ! -e "$PEP8_SCRIPT_PATH" ]; then
 curl --silent -o "$PEP8_SCRIPT_PATH" "$PEP8_SCRIPT_REMOTE_PATH"    
-curl_status=$?
+curl_status="$?"
 
-if [ $curl_status -ne 0 ]; then
+if [ "$curl_status" -ne 0 ]; then
     echo "Failed to download pep8.py from \"$PEP8_SCRIPT_REMOTE_PATH\"."
-    exit $curl_status
+    exit "$curl_status"
 fi
-
+# fi
 
 # There is no need to write this output to a file
 #+ first, but we do so so that the check status can
 #+ be output before the report, like with the
 #+ scalastyle and RAT checks.
-python "$PEP8_SCRIPT_PATH" $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
-pep8_status=${PIPESTATUS[0]} #$?
+python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 $PATHS_TO_CHECK >> "$PYTHON_LINT_REPORT_PATH"
+pep8_status="${PIPESTATUS[0]}"
+
+if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then
+    lint_status=0
+else
+    lint_status=1
+fi
 
-if [ $pep8_status -ne 0 ]; then
-    echo "PEP 8 checks failed."
-    cat "$PEP8_REPORT_PATH"
+if [ "$lint_status" -ne 0 ]; then
+    echo "Python lint checks failed."
+    cat "$PYTHON_LINT_REPORT_PATH"
 else
-    echo "PEP 8 checks passed."
+    echo "Python lint checks passed."
 fi
 
-rm "$PEP8_REPORT_PATH"
 rm "$PEP8_SCRIPT_PATH"
+rm "$PYTHON_LINT_REPORT_PATH"
 
-exit $pep8_status
+exit "$lint_status"
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index 6a849e4f77207..5f4000e83925c 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -49,6 +49,21 @@ SHORT_COMMIT_HASH="${ghprbActualCommit:0:7}"
 
 TESTS_TIMEOUT="120m" # format: http://linux.die.net/man/1/timeout
 
+# Array to capture all tests to run on the pull request. These tests are held under the
+#+ dev/tests/ directory.
+#
+# To write a PR test:
+#+  * the file must reside within the dev/tests directory
+#+  * be an executable bash script
+#+  * accept two arguments on the command line, the first being the Github PR long commit
+#+    hash and the second the Github SHA1 hash
+#+  * and, lastly, return string output to be included in the pr message output that will
+#+    be posted to Github
+PR_TESTS=(
+  "pr_merge_ability"
+  "pr_public_classes"
+)
+
 function post_message () {
   local message=$1
   local data="{\"body\": \"$message\"}"
@@ -131,48 +146,22 @@ function send_archived_logs () {
   fi
 }
 
-
-# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
-#+ and not anything else added to master since the PR was branched.
-
-# check PR merge-ability and check for new public classes
-{
-  if [ "$sha1" == "$ghprbActualCommit" ]; then
-    merge_note=" * This patch **does not merge cleanly**."
-  else
-    merge_note=" * This patch merges cleanly."
+# Environment variable to capture PR test output
+pr_message=""
+
+# Run pull request tests
+for t in "${PR_TESTS[@]}"; do
+  this_test="${FWDIR}/dev/tests/${t}.sh"
+  # Ensure the test is a file and is executable
+  if [ -x "$this_test" ]; then
+    echo "ghprb: $ghprbActualCommit sha1: $sha1"
+    this_mssg="`bash \"${this_test}\" \"${ghprbActualCommit}\" \"${sha1}\" 2>/dev/null`"
+    # Check if this is the merge test as we submit that note *before* and *after*
+    # the tests run
+    [ "$t" == "pr_merge_ability" ] && merge_note="${this_mssg}"
+    pr_message="${pr_message}\n${this_mssg}"
   fi
-
-  source_files=$(
-      git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
-    | grep -v -e "\/test"                               `# ignore files in test directories` \
-    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
-    | tr "\n" " "
-  )
-  new_public_classes=$(
-      git diff master...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
-    | grep "^\+"                              `# filter in only added lines` \
-    | sed -r -e "s/^\+//g"                    `# remove the leading +` \
-    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
-    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
-    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
-    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
-    | sed -r -e "s/\{.*//g"                   `# remove from the { onwards` \
-    | sed -r -e "s/\}//g"                     `# just in case, remove }; they mess the JSON` \
-    | sed -r -e "s/\"/\\\\\"/g"               `# escape double quotes; they mess the JSON` \
-    | sed -r -e "s/^(.*)$/\`\1\`/g"           `# surround with backticks for style` \
-    | sed -r -e "s/^/  \* /g"                 `# prepend '  *' to start of line` \
-    | sed -r -e "s/$/\\\n/g"                  `# append newline to end of line` \
-    | tr -d "\n"                              `# remove actual LF characters`
-  )
-
-  if [ -z "$new_public_classes" ]; then
-    public_classes_note=" * This patch adds no public classes."
-  else
-    public_classes_note=" * This patch adds the following public classes _(experimental)_:"
-    public_classes_note="${public_classes_note}\n${new_public_classes}"
-  fi
-}
+done
 
 # post start message
 {
@@ -181,7 +170,6 @@ function send_archived_logs () {
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
 
   start_message="${start_message}\n${merge_note}"
-  # start_message="${start_message}\n${public_classes_note}"
 
   post_message "$start_message"
 }
@@ -234,8 +222,7 @@ function send_archived_logs () {
   PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL})."
 
   result_message="${result_message}\n${test_result_note}"
-  result_message="${result_message}\n${merge_note}"
-  result_message="${result_message}\n${public_classes_note}"
+  result_message="${result_message}\n${pr_message}"
 
   post_message "$result_message"
 }
diff --git a/dev/tests/pr_merge_ability.sh b/dev/tests/pr_merge_ability.sh
new file mode 100755
index 0000000000000..d9a347fe24a8c
--- /dev/null
+++ b/dev/tests/pr_merge_ability.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This script follows the base format for testing pull requests against
+# another branch and returning results to be published. More details can be
+# found at dev/run-tests-jenkins.
+#
+# Arg1: The Github Pull Request Actual Commit
+#+ known as `ghprbActualCommit` in `run-tests-jenkins`
+# Arg2: The SHA1 hash
+#+ known as `sha1` in `run-tests-jenkins`
+#
+
+ghprbActualCommit="$1"
+sha1="$2"
+
+# check PR merge-ability
+if [ "${sha1}" == "${ghprbActualCommit}" ]; then
+  echo " * This patch **does not merge cleanly**."
+else
+  echo " * This patch merges cleanly."
+fi
diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh
new file mode 100755
index 0000000000000..927295b88c963
--- /dev/null
+++ b/dev/tests/pr_public_classes.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# This script follows the base format for testing pull requests against
+# another branch and returning results to be published. More details can be
+# found at dev/run-tests-jenkins.
+#
+# Arg1: The Github Pull Request Actual Commit
+#+ known as `ghprbActualCommit` in `run-tests-jenkins`
+# Arg2: The SHA1 hash
+#+ known as `sha1` in `run-tests-jenkins`
+#
+
+# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
+#+ and not anything else added to master since the PR was branched.
+
+ghprbActualCommit="$1"
+sha1="$2"
+
+source_files=$(
+  git diff master...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
+    | grep -v -e "\/test"                               `# ignore files in test directories` \
+    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
+    | tr "\n" " "
+)
+new_public_classes=$(
+  git diff master...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
+    | grep "^\+"                              `# filter in only added lines` \
+    | sed -r -e "s/^\+//g"                    `# remove the leading +` \
+    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
+    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
+    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
+    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
+    | sed -r -e "s/\{.*//g"                   `# remove from the { onwards` \
+    | sed -r -e "s/\}//g"                     `# just in case, remove }; they mess the JSON` \
+    | sed -r -e "s/\"/\\\\\"/g"               `# escape double quotes; they mess the JSON` \
+    | sed -r -e "s/^(.*)$/\`\1\`/g"           `# surround with backticks for style` \
+    | sed -r -e "s/^/  \* /g"                 `# prepend '  *' to start of line` \
+    | sed -r -e "s/$/\\\n/g"                  `# append newline to end of line` \
+    | tr -d "\n"                              `# remove actual LF characters`
+)
+
+if [ -z "$new_public_classes" ]; then
+  echo " * This patch adds no public classes."
+else
+  public_classes_note=" * This patch adds the following public classes _(experimental)_:"
+  echo "${public_classes_note}\n${new_public_classes}"
+fi
diff --git a/docs/_config.yml b/docs/_config.yml
index 0652927a8ce9b..b22b627f09007 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.3.0-SNAPSHOT
-SPARK_VERSION_SHORT: 1.3.0
+SPARK_VERSION: 1.4.0-SNAPSHOT
+SPARK_VERSION_SHORT: 1.4.0
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.4"
 MESOS_VERSION: 0.21.0
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 57d0ca834f460..ea79c5bc276d3 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -23,6 +23,18 @@ build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
 
 Other build examples can be found below.
 
+**Note:** When building on an encrypted filesystem (if your home directory is encrypted, for example), then the Spark build might fail with a "Filename too long" error. As a workaround, add the following in the configuration args of the `scala-maven-plugin` in the project `pom.xml`:
+
+    <arg>-Xmax-classfile-name</arg>
+    <arg>128</arg>
+
+and in `project/SparkBuild.scala` add:
+
+    scalacOptions in Compile ++= Seq("-Xmax-classfile-name", "128"),
+
+to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/pull/2883/files) if you are unsure of where to add these lines.
+
+
 # Setting up Maven's Memory Usage
 
 You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings:
diff --git a/docs/configuration.md b/docs/configuration.md
index 63fc99e7d3e29..7fe11475212b3 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -745,6 +745,18 @@ Apart from these, the following properties are also available, and may be useful
     the driver, in seconds.
   </td>
 </tr>
+<tr>
+  <td><code>spark.files.useFetchCache</code></td>
+  <td>true</td>
+  <td>
+    If set to true (default), file fetching will use a local cache that is shared by executors
+    that belong to the same application, which can improve task launching performance when
+    running many executors on the same host. If set to false, these caching optimizations will
+    be disabled and all executors will fetch their own copies of files. This optimization may be
+    disabled in order to use Spark local directories that reside on NFS filesystems (see
+    <a href="https://issues.apache.org/jira/browse/SPARK-6313">SPARK-6313</a> for more details).
+  </td>
+</tr>
 <tr>
   <td><code>spark.files.overwrite</code></td>
   <td>false</td>
diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
index 8c9a1e1262d8f..7f60f82b966fe 100644
--- a/docs/ec2-scripts.md
+++ b/docs/ec2-scripts.md
@@ -5,7 +5,7 @@ title: Running Spark on EC2
 
 The `spark-ec2` script, located in Spark's `ec2` directory, allows you
 to launch, manage and shut down Spark clusters on Amazon EC2. It automatically
-sets up Spark, Shark and HDFS on the cluster for you. This guide describes 
+sets up Spark and HDFS on the cluster for you. This guide describes 
 how to use `spark-ec2` to launch clusters, how to run jobs on them, and how 
 to shut them down. It assumes you've already signed up for an EC2 account 
 on the [Amazon Web Services site](http://aws.amazon.com/).
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 5295e351dd711..963e88a3e1d8f 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -14,8 +14,7 @@ runs an independent set of executor processes. The cluster managers that Spark r
 facilities for [scheduling across applications](#scheduling-across-applications). Second,
 _within_ each Spark application, multiple "jobs" (Spark actions) may be running concurrently
 if they were submitted by different threads. This is common if your application is serving requests
-over the network; for example, the [Shark](http://shark.cs.berkeley.edu) server works this way. Spark
-includes a [fair scheduler](#scheduling-within-an-application) to schedule resources within each SparkContext.
+over the network. Spark includes a [fair scheduler](#scheduling-within-an-application) to schedule resources within each SparkContext.
 
 # Scheduling Across Applications
 
@@ -52,8 +51,7 @@ an application to gain back cores on one node when it has work to do. To use thi
 
 Note that none of the modes currently provide memory sharing across applications. If you would like to share
 data this way, we recommend running a single server application that can serve multiple requests by querying
-the same RDDs. For example, the [Shark](http://shark.cs.berkeley.edu) JDBC server works this way for SQL
-queries. In future releases, in-memory storage systems such as [Tachyon](http://tachyon-project.org) will
+the same RDDs. In future releases, in-memory storage systems such as [Tachyon](http://tachyon-project.org) will
 provide another approach to share RDDs.
 
 ## Dynamic Resource Allocation
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index fe6c1bf7bfd99..4f2a2f71048f7 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -78,13 +78,13 @@ MLlib recognizes the following types as dense vectors:
 
 and the following as sparse vectors:
 
-* MLlib's [`SparseVector`](api/python/pyspark.mllib.linalg.SparseVector-class.html).
+* MLlib's [`SparseVector`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.SparseVector).
 * SciPy's
   [`csc_matrix`](http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html#scipy.sparse.csc_matrix)
   with a single column
 
 We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented
-in [`Vectors`](api/python/pyspark.mllib.linalg.Vectors-class.html) to create sparse vectors.
+in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to create sparse vectors.
 
 {% highlight python %}
 import numpy as np
@@ -151,7 +151,7 @@ LabeledPoint neg = new LabeledPoint(1.0, Vectors.sparse(3, new int[] {0, 2}, new
 <div data-lang="python" markdown="1">
 
 A labeled point is represented by
-[`LabeledPoint`](api/python/pyspark.mllib.regression.LabeledPoint-class.html).
+[`LabeledPoint`](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint).
 
 {% highlight python %}
 from pyspark.mllib.linalg import SparseVector
@@ -211,7 +211,7 @@ JavaRDD<LabeledPoint> examples =
 </div>
 
 <div data-lang="python" markdown="1">
-[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.util.MLUtils-class.html) reads training
+[`MLUtils.loadLibSVMFile`](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) reads training
 examples stored in LIBSVM format.
 
 {% highlight python %}
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
index cbfb682609af3..7521fb14a7bd6 100644
--- a/docs/mllib-ensembles.md
+++ b/docs/mllib-ensembles.md
@@ -464,8 +464,8 @@ first one being the training dataset and the second being the validation dataset
 The training is stopped when the improvement in the validation error is not more than a certain tolerance
 (supplied by the `validationTol` argument in `BoostingStrategy`). In practice, the validation error
 decreases initially and later increases. There might be cases in which the validation error does not change monotonically,
-and the user is advised to set a large enough negative tolerance and examine the validation curve to to tune the number of
-iterations.
+and the user is advised to set a large enough negative tolerance and examine the validation curve using `evaluateEachIteration`
+(which gives the error or loss per iteration) to tune the number of iterations.
 
 ### Examples
 
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 55b8f2ce6c364..a83472f5be52e 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -106,11 +106,11 @@ NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), "myModelPath");
 
 <div data-lang="python" markdown="1">
 
-[NaiveBayes](api/python/pyspark.mllib.classification.NaiveBayes-class.html) implements multinomial
+[NaiveBayes](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayes) implements multinomial
 naive Bayes. It takes an RDD of
-[LabeledPoint](api/python/pyspark.mllib.regression.LabeledPoint-class.html) and an optionally
+[LabeledPoint](api/python/pyspark.mllib.html#pyspark.mllib.regression.LabeledPoint) and an optionally
 smoothing parameter `lambda` as input, and output a
-[NaiveBayesModel](api/python/pyspark.mllib.classification.NaiveBayesModel-class.html), which can be
+[NaiveBayesModel](api/python/pyspark.mllib.html#pyspark.mllib.classification.NaiveBayesModel), which can be
 used for evaluation and prediction.
 
 Note that the Python API does not yet support model save/load but will in the future.
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index 4d101afca2c97..6cabc1610a151 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -203,6 +203,10 @@ regularization, as well as L2 regularizer.
 recommended.
 * `maxNumIterations` is the maximal number of iterations that L-BFGS can be run.
 * `regParam` is the regularization parameter when using regularization.
+* `convergenceTol` controls how much relative change is still allowed when L-BFGS
+is considered to converge. This must be nonnegative. Lower values are less tolerant and
+therefore generally cause more iterations to be run. This value looks at both average
+improvement and the norm of gradient inside [Breeze LBFGS](https://github.com/scalanlp/breeze/blob/master/math/src/main/scala/breeze/optimize/LBFGS.scala).
 
 The `return` is a tuple containing two elements. The first element is a column matrix
 containing weights for every feature, and the second element is an array containing 
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index ca8c29218f52d..887eae7f4f07b 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -81,8 +81,8 @@ System.out.println(summary.numNonzeros()); // number of nonzeros in each column
 </div>
 
 <div data-lang="python" markdown="1">
-[`colStats()`](api/python/pyspark.mllib.stat.Statistics-class.html#colStats) returns an instance of
-[`MultivariateStatisticalSummary`](api/python/pyspark.mllib.stat.MultivariateStatisticalSummary-class.html),
+[`colStats()`](api/python/pyspark.mllib.html#pyspark.mllib.stat.Statistics.colStats) returns an instance of
+[`MultivariateStatisticalSummary`](api/python/pyspark.mllib.html#pyspark.mllib.stat.MultivariateStatisticalSummary),
 which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
 total count.
 
@@ -169,7 +169,7 @@ Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
 </div>
 
 <div data-lang="python" markdown="1">
-[`Statistics`](api/python/pyspark.mllib.stat.Statistics-class.html) provides methods to 
+[`Statistics`](api/python/pyspark.mllib.html#pyspark.mllib.stat.Statistics) provides methods to 
 calculate correlations between series. Depending on the type of input, two `RDD[Double]`s or 
 an `RDD[Vector]`, the output will be a `Double` or the correlation `Matrix` respectively.
 
@@ -258,7 +258,7 @@ JavaPairRDD<K, V> exactSample = data.sampleByKeyExact(false, fractions);
 {% endhighlight %}
 </div>
 <div data-lang="python" markdown="1">
-[`sampleByKey()`](api/python/pyspark.rdd.RDD-class.html#sampleByKey) allows users to
+[`sampleByKey()`](api/python/pyspark.html#pyspark.RDD.sampleByKey) allows users to
 sample approximately $\lceil f_k \cdot n_k \rceil \, \forall k \in K$ items, where $f_k$ is the 
 desired fraction for key $k$, $n_k$ is the number of key-value pairs for key $k$, and $K$ is the 
 set of keys.
@@ -476,7 +476,7 @@ JavaDoubleRDD v = u.map(
 </div>
 
 <div data-lang="python" markdown="1">
-[`RandomRDDs`](api/python/pyspark.mllib.random.RandomRDDs-class.html) provides factory
+[`RandomRDDs`](api/python/pyspark.mllib.html#pyspark.mllib.random.RandomRDDs) provides factory
 methods to generate random double RDDs or vector RDDs.
 The following example generates a random double RDD, whose values follows the standard normal
 distribution `N(0, 1)`, and then map it to `N(1, 4)`.
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index eda3a95426182..5fe832b6fa100 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -142,8 +142,8 @@ JavaSparkContext sc = new JavaSparkContext(conf);
 
 <div data-lang="python"  markdown="1">
 
-The first thing a Spark program must do is to create a [SparkContext](api/python/pyspark.context.SparkContext-class.html) object, which tells Spark
-how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/python/pyspark.conf.SparkConf-class.html) object
+The first thing a Spark program must do is to create a [SparkContext](api/python/pyspark.html#pyspark.SparkContext) object, which tells Spark
+how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/python/pyspark.html#pyspark.SparkConf) object
 that contains information about your application.
 
 {% highlight python %}
@@ -912,7 +912,7 @@ The following table lists some of the common transformations supported by Spark.
 RDD API doc
 ([Scala](api/scala/index.html#org.apache.spark.rdd.RDD),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html),
- [Python](api/python/pyspark.rdd.RDD-class.html))
+ [Python](api/python/pyspark.html#pyspark.RDD))
 and pair RDD functions doc
 ([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
@@ -1025,7 +1025,7 @@ The following table lists some of the common actions supported by Spark. Refer t
 RDD API doc
 ([Scala](api/scala/index.html#org.apache.spark.rdd.RDD),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html),
- [Python](api/python/pyspark.rdd.RDD-class.html))
+ [Python](api/python/pyspark.html#pyspark.RDD))
 and pair RDD functions doc
 ([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
@@ -1105,7 +1105,7 @@ replicate it across nodes, or store it off-heap in [Tachyon](http://tachyon-proj
 These levels are set by passing a
 `StorageLevel` object ([Scala](api/scala/index.html#org.apache.spark.storage.StorageLevel),
 [Java](api/java/index.html?org/apache/spark/storage/StorageLevel.html),
-[Python](api/python/pyspark.storagelevel.StorageLevel-class.html))
+[Python](api/python/pyspark.html#pyspark.StorageLevel))
 to `persist()`. The `cache()` method is a shorthand for using the default storage level,
 which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The full set of
 storage levels is:
@@ -1374,7 +1374,7 @@ scala> accum.value
 {% endhighlight %}
 
 While this code used the built-in support for accumulators of type Int, programmers can also
-create their own types by subclassing [AccumulatorParam](api/python/pyspark.accumulators.AccumulatorParam-class.html).
+create their own types by subclassing [AccumulatorParam](api/python/pyspark.html#pyspark.AccumulatorParam).
 The AccumulatorParam interface has two methods: `zero` for providing a "zero value" for your data
 type, and `addInPlace` for adding two values together. For example, supposing we had a `Vector` class
 representing mathematical vectors, we could write:
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 59a3e9d25baf1..c984639bd34cf 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -224,11 +224,9 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.mesos.executor.memoryOverhead</code></td>
   <td>executor memory * 0.10, with minimum of 384</td>
   <td>
-    This value is an additive for <code>spark.executor.memory</code>, specified in MiB,
-    which is used to calculate the total Mesos task memory. A value of <code>384</code>
-    implies a 384MiB overhead. Additionally, there is a hard-coded 7% minimum
-    overhead. The final overhead will be the larger of either
-    `spark.mesos.executor.memoryOverhead` or 7% of `spark.executor.memory`.
+    The amount of additional memory, specified in MB, to be allocated per executor. By default,
+    the overhead will be larger of either 384 or 10% of `spark.executor.memory`. If it's set,
+    the final overhead will be this value.
   </td>
 </tr>
 </table>
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 11c29e20632ae..6a333fdb562a7 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -56,7 +56,7 @@ SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
 <div data-lang="python"  markdown="1">
 
 The entry point into all relational functionality in Spark is the
-[`SQLContext`](api/python/pyspark.sql.SQLContext-class.html) class, or one
+[`SQLContext`](api/python/pyspark.sql.html#pyspark.sql.SQLContext) class, or one
 of its decedents.  To create a basic `SQLContext`, all you need is a SparkContext.
 
 {% highlight python %}
@@ -170,14 +170,14 @@ df.select("name").show()
 // Justin
 
 // Select everybody, but increment the age by 1
-df.select("name", df("age") + 1).show()
+df.select(df("name"), df("age") + 1).show()
 // name    (age + 1)
 // Michael null
 // Andy    31
 // Justin  20
 
 // Select people older than 21
-df.filter(df("name") > 21).show()
+df.filter(df("age") > 21).show()
 // age name
 // 30  Andy
 
@@ -220,14 +220,14 @@ df.select("name").show();
 // Justin
 
 // Select everybody, but increment the age by 1
-df.select("name", df.col("age").plus(1)).show();
+df.select(df.col("name"), df.col("age").plus(1)).show();
 // name    (age + 1)
 // Michael null
 // Andy    31
 // Justin  20
 
 // Select people older than 21
-df.filter(df("name") > 21).show();
+df.filter(df.col("age").gt(21)).show();
 // age name
 // 30  Andy
 
@@ -270,14 +270,14 @@ df.select("name").show()
 ## Justin
 
 # Select everybody, but increment the age by 1
-df.select("name", df.age + 1).show()
+df.select(df.name, df.age + 1).show()
 ## name    (age + 1)
 ## Michael null
 ## Andy    31
 ## Justin  20
 
 # Select people older than 21
-df.filter(df.name > 21).show()
+df.filter(df.age > 21).show()
 ## age name
 ## 30  Andy
 
@@ -509,8 +509,11 @@ val people = sc.textFile("examples/src/main/resources/people.txt")
 // The schema is encoded in a string
 val schemaString = "name age"
 
-// Import Spark SQL data types and Row.
-import org.apache.spark.sql._
+// Import Row.
+import org.apache.spark.sql.Row;
+
+// Import Spark SQL data types
+import org.apache.spark.sql.types.{StructType,StructField,StringType};
 
 // Generate the schema based on the string of schema
 val schema =
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 57b074778f2b0..3ecbf2308cd44 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -133,10 +133,10 @@ The master URL passed to Spark can be in one of the following formats:
         Or, for a Mesos cluster using ZooKeeper, use <code>mesos://zk://...</code>.
 </td></tr>
 <tr><td> yarn-client </td><td> Connect to a <a href="running-on-yarn.html"> YARN </a> cluster in
-client mode. The cluster location will be found based on the HADOOP_CONF_DIR variable.
+client mode. The cluster location will be found based on the HADOOP_CONF_DIR or YARN_CONF_DIR variable.
 </td></tr>
 <tr><td> yarn-cluster </td><td> Connect to a <a href="running-on-yarn.html"> YARN </a> cluster in
-cluster mode. The cluster location will be found based on HADOOP_CONF_DIR.
+cluster mode. The cluster location will be found based on the HADOOP_CONF_DIR or YARN_CONF_DIR variable.
 </td></tr>
 </table>
 
diff --git a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
index 0857657152ec7..4f3e8da809f7f 100644
--- a/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
+++ b/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
@@ -25,7 +25,6 @@ export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}"
 export SPARK_LOCAL_DIRS="{{spark_local_dirs}}"
 export MODULES="{{modules}}"
 export SPARK_VERSION="{{spark_version}}"
-export SHARK_VERSION="{{shark_version}}"
 export TACHYON_VERSION="{{tachyon_version}}"
 export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
 export SWAP_MB="{{swap}}"
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index f848874b0c775..c467cd08ed742 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -1159,8 +1159,8 @@ def real_main():
             if EC2_INSTANCE_TYPES[opts.instance_type] != \
                EC2_INSTANCE_TYPES[opts.master_instance_type]:
                 print >> stderr, \
-                    "Error: spark-ec2 currently does not support having a master and slaves with " + \
-                    "different AMI virtualization types."
+                    "Error: spark-ec2 currently does not support having a master and slaves " + \
+                    "with different AMI virtualization types."
                 print >> stderr, "master instance virtualization type: {t}".format(
                     t=EC2_INSTANCE_TYPES[opts.master_instance_type])
                 print >> stderr, "slave instance virtualization type: {t}".format(
diff --git a/examples/pom.xml b/examples/pom.xml
index 994071d94d0ad..7e93f0eec0b91 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 91a0a860d6c71..1f4ca4fbe7778 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -175,7 +175,8 @@ object MovieLensALS {
   }
 
   /** Compute RMSE (Root Mean Squared Error). */
-  def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean) = {
+  def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean)
+    : Double = {
 
     def mapPredictedRating(r: Double) = if (implicitPrefs) math.max(math.min(r, 1.0), 0.0) else r
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
index 91c9772744f18..9f22d40c15f3f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
@@ -116,7 +116,7 @@ object PowerIterationClusteringExample {
     sc.stop()
   }
 
-  def generateCircle(radius: Double, n: Int) = {
+  def generateCircle(radius: Double, n: Int): Seq[(Double, Double)] = {
     Seq.tabulate(n) { i =>
       val theta = 2.0 * math.Pi * i / n
       (radius * math.cos(theta), radius * math.sin(theta))
@@ -147,7 +147,7 @@ object PowerIterationClusteringExample {
   /**
    * Gaussian Similarity:  http://en.wikipedia.org/wiki/Radial_basis_function_kernel
    */
-  def gaussianSimilarity(p1: (Double, Double), p2: (Double, Double), sigma: Double) = {
+  def gaussianSimilarity(p1: (Double, Double), p2: (Double, Double), sigma: Double): Double = {
     val coeff = 1.0 / (math.sqrt(2.0 * math.Pi) * sigma)
     val expCoeff = -1.0 / 2.0 * math.pow(sigma, 2.0)
     val ssquares = (p1._1 - p2._1) * (p1._1 - p2._1) + (p1._2 - p2._2) * (p1._2 - p2._2)
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 96c2787e35cd0..67907bbfb6d1b 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 172d447b77cda..8df7edbdcad33 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 5109b8ed87524..0b79f47647f6b 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 369856187a244..f695cff410a18 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
index fc53c23abda85..3cd960d1fd1d4 100644
--- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/ReliableKafkaStreamSuite.scala
@@ -25,16 +25,15 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 import scala.util.Random
 
-import com.google.common.io.Files
 import kafka.serializer.StringDecoder
 import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
-import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Milliseconds, StreamingContext}
+import org.apache.spark.util.Utils
 
 class ReliableKafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter with Eventually {
 
@@ -60,7 +59,7 @@ class ReliableKafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter
     )
 
     ssc = new StreamingContext(sparkConf, Milliseconds(500))
-    tempDirectory = Files.createTempDir()
+    tempDirectory = Utils.createTempDir()
     ssc.checkpoint(tempDirectory.getAbsolutePath)
   }
 
@@ -68,10 +67,7 @@ class ReliableKafkaStreamSuite extends KafkaStreamSuiteBase with BeforeAndAfter
     if (ssc != null) {
       ssc.stop()
     }
-    if (tempDirectory != null && tempDirectory.exists()) {
-      FileUtils.deleteDirectory(tempDirectory)
-      tempDirectory = null
-    }
+    Utils.deleteRecursively(tempDirectory)
     tearDownKafka()
   }
 
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index a344f000c5002..98f95a9a64fa0 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index e95853f005ce2..8b6a8959ac4cf 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 9b3475d7c3dc2..a50d378b34335 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml
index bc2f8be10c9ce..4351a8a12fe21 100644
--- a/extras/java8-tests/pom.xml
+++ b/extras/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
index 7e49a71907336..25847a1b33d9c 100644
--- a/extras/kinesis-asl/pom.xml
+++ b/extras/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml
index 6eb29af03f833..e14bbae4a9b6e 100644
--- a/extras/spark-ganglia-lgpl/pom.xml
+++ b/extras/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c0d534e185d7f..d38a3aa8256b7 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
index f70715fca6eea..d8be02e2023d5 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
@@ -49,3 +49,20 @@ abstract class EdgeContext[VD, ED, A] {
     et
   }
 }
+
+object EdgeContext {
+
+  /**
+   * Extractor mainly used for Graph#aggregateMessages*.
+   * Example:
+   * {{{
+   *  val messages = graph.aggregateMessages(
+   *    case ctx @ EdgeContext(_, _, _, _, attr) =>
+   *      ctx.sendToDst(attr)
+   *    , _ + _)
+   * }}}
+   */
+  def unapply[VD, ED, A](edge: EdgeContext[VD, ED, A]) =
+    Some(edge.srcId, edge.dstId, edge.srcAttr, edge.dstAttr, edge.attr)
+}
+
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index dc8b4789c4b61..86f611d55aa8a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -113,7 +113,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * Collect the neighbor vertex attributes for each vertex.
    *
    * @note This function could be highly inefficient on power-law
-   * graphs where high degree vertices may force a large ammount of
+   * graphs where high degree vertices may force a large amount of
    * information to be collected to a single location.
    *
    * @param edgeDirection the direction along which to collect
@@ -187,7 +187,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
 
   /**
    * Join the vertices with an RDD and then apply a function from the
-   * the vertex and RDD entry to a new vertex value.  The input table
+   * vertex and RDD entry to a new vertex value.  The input table
    * should contain at most one entry for each vertex.  If no entry is
    * provided the map function is skipped and the old value is used.
    *
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 5e55620147df8..01b013ff716fc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -78,8 +78,8 @@ object Pregel extends Logging {
    *
    * @param graph the input graph.
    *
-   * @param initialMsg the message each vertex will receive at the on
-   * the first iteration
+   * @param initialMsg the message each vertex will receive at the first
+   * iteration
    *
    * @param maxIterations the maximum number of iterations to run for
    *
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index e139959c3f5c1..570440ba4441f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -25,8 +25,8 @@ import org.apache.spark.graphx._
 /**
  * PageRank algorithm implementation. There are two implementations of PageRank implemented.
  *
- * The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number
- * of iterations:
+ * The first implementation uses the standalone [[Graph]] interface and runs PageRank
+ * for a fixed number of iterations:
  * {{{
  * var PR = Array.fill(n)( 1.0 )
  * val oldPR = Array.fill(n)( 1.0 )
@@ -38,7 +38,7 @@ import org.apache.spark.graphx._
  * }
  * }}}
  *
- * The second implementation uses the standalone [[Graph]] interface and runs PageRank until
+ * The second implementation uses the [[Pregel]] interface and runs PageRank until
  * convergence:
  *
  * {{{
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index b61d9f0fbe5e4..8d15150458d26 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -19,13 +19,12 @@ package org.apache.spark.graphx
 
 import org.scalatest.FunSuite
 
-import com.google.common.io.Files
-
 import org.apache.spark.SparkContext
 import org.apache.spark.graphx.Graph._
 import org.apache.spark.graphx.PartitionStrategy._
 import org.apache.spark.rdd._
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 class GraphSuite extends FunSuite with LocalSparkContext {
 
@@ -369,8 +368,7 @@ class GraphSuite extends FunSuite with LocalSparkContext {
   }
 
   test("checkpoint") {
-    val checkpointDir = Files.createTempDir()
-    checkpointDir.deleteOnExit()
+    val checkpointDir = Utils.createTempDir()
     withSpark { sc =>
       sc.setCheckpointDir(checkpointDir.getAbsolutePath)
       val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1)}
diff --git a/launcher/pom.xml b/launcher/pom.xml
index ccbd9d0419a98..0fe2814135d88 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 6ffdff63d3c78..91dcf70f105db 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -253,12 +253,6 @@ private boolean isClientMode(Properties userProps) {
 
   private class OptionParser extends SparkSubmitOptionParser {
 
-    private final List<String> driverJvmKeys = Arrays.asList(
-      SparkLauncher.DRIVER_EXTRA_CLASSPATH,
-      SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
-      SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH,
-      SparkLauncher.DRIVER_MEMORY);
-
     @Override
     protected boolean handle(String opt, String value) {
       if (opt.equals(MASTER)) {
@@ -278,9 +272,7 @@ protected boolean handle(String opt, String value) {
       } else if (opt.equals(CONF)) {
         String[] setConf = value.split("=", 2);
         checkArgument(setConf.length == 2, "Invalid argument to %s: %s", CONF, value);
-        if (driverJvmKeys.contains(setConf[0])) {
-          conf.put(setConf[0], setConf[1]);
-        }
+        conf.put(setConf[0], setConf[1]);
       } else if (opt.equals(CLASS)) {
         // The special classes require some special command line handling, since they allow
         // mixing spark-submit arguments with arguments that should be propagated to the shell
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 815edc4e4971f..626116a9e7477 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -68,6 +68,8 @@ public void testCliParser() throws Exception {
       parser.DRIVER_JAVA_OPTIONS,
       "extraJavaOpt",
       parser.CONF,
+      "spark.randomOption=foo",
+      parser.CONF,
       SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH + "=/driverLibPath");
     Map<String, String> env = new HashMap<String, String>();
     List<String> cmd = buildCommand(sparkSubmitArgs, env);
@@ -77,6 +79,8 @@ public void testCliParser() throws Exception {
     assertTrue(findInStringList(findArgValue(cmd, "-cp"), File.pathSeparator, "/driverCp"));
     assertTrue("Driver -Xms should be configured.", cmd.contains("-Xms42g"));
     assertTrue("Driver -Xmx should be configured.", cmd.contains("-Xmx42g"));
+    assertTrue("Command should contain user-defined conf.",
+      Collections.indexOfSubList(cmd, Arrays.asList(parser.CONF, "spark.randomOption=foo")) > 0);
   }
 
   @Test
diff --git a/make-distribution.sh b/make-distribution.sh
index 9ed1abfe8c598..8162fe94c1af0 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -32,7 +32,7 @@ SPARK_HOME="$(cd "`dirname "$0"`"; pwd)"
 DISTDIR="$SPARK_HOME/dist"
 
 SPARK_TACHYON=false
-TACHYON_VERSION="0.5.0"
+TACHYON_VERSION="0.6.1"
 TACHYON_TGZ="tachyon-${TACHYON_VERSION}-bin.tar.gz"
 TACHYON_URL="https://github.com/amplab/tachyon/releases/download/v${TACHYON_VERSION}/${TACHYON_TGZ}"
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a76704a8c2c59..4c183543e3fa8 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 5bbcd2e080e07..c4a36103303a2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.StructType
 abstract class PipelineStage extends Serializable with Logging {
 
   /**
-   * :: DeveloperAPI ::
+   * :: DeveloperApi ::
    *
    * Derives the output schema from the input schema and parameters.
    * The schema describes the columns and types of the data.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index 6131ba8832691..fc4e12773c46d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -41,7 +41,7 @@ class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] {
   def getNumFeatures: Int = get(numFeatures)
 
   /** @group setParam */
-  def setNumFeatures(value: Int) = set(numFeatures, value)
+  def setNumFeatures(value: Int): this.type = set(numFeatures, value)
 
   override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = {
     val hashingTF = new feature.HashingTF(paramMap(numFeatures))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
index 1a70322b4cace..5d660d1e151a7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/sharedParams.scala
@@ -138,3 +138,14 @@ private[ml] trait HasOutputCol extends Params {
   /** @group getParam */
   def getOutputCol: String = get(outputCol)
 }
+
+private[ml] trait HasCheckpointInterval extends Params {
+  /**
+   * param for checkpoint interval
+   * @group param
+   */
+  val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "checkpoint interval")
+
+  /** @group getParam */
+  def getCheckpointInterval: Int = get(checkpointInterval)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index e3515ee81af3d..514b4ef98dc5b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.recommendation
 
 import java.{util => ju}
+import java.io.IOException
 
 import scala.collection.mutable
 import scala.reflect.ClassTag
@@ -26,6 +27,7 @@ import scala.util.hashing.byteswap64
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import com.github.fommil.netlib.LAPACK.{getInstance => lapack}
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.netlib.util.intW
 
 import org.apache.spark.{Logging, Partitioner}
@@ -46,7 +48,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * Common params for ALS.
  */
 private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam
-  with HasPredictionCol {
+  with HasPredictionCol with HasCheckpointInterval {
 
   /**
    * Param for rank of the matrix factorization.
@@ -164,6 +166,7 @@ class ALSModel private[ml] (
     itemFactors: RDD[(Int, Array[Float])])
   extends Model[ALSModel] with ALSParams {
 
+  /** @group setParam */
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
   override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
@@ -262,6 +265,9 @@ class ALS extends Estimator[ALSModel] with ALSParams {
   /** @group setParam */
   def setNonnegative(value: Boolean): this.type = set(nonnegative, value)
 
+  /** @group setParam */
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+
   /**
    * Sets both numUserBlocks and numItemBlocks to the specific value.
    * @group setParam
@@ -274,6 +280,7 @@ class ALS extends Estimator[ALSModel] with ALSParams {
 
   setMaxIter(20)
   setRegParam(1.0)
+  setCheckpointInterval(10)
 
   override def fit(dataset: DataFrame, paramMap: ParamMap): ALSModel = {
     val map = this.paramMap ++ paramMap
@@ -285,7 +292,8 @@ class ALS extends Estimator[ALSModel] with ALSParams {
     val (userFactors, itemFactors) = ALS.train(ratings, rank = map(rank),
       numUserBlocks = map(numUserBlocks), numItemBlocks = map(numItemBlocks),
       maxIter = map(maxIter), regParam = map(regParam), implicitPrefs = map(implicitPrefs),
-      alpha = map(alpha), nonnegative = map(nonnegative))
+      alpha = map(alpha), nonnegative = map(nonnegative),
+      checkpointInterval = map(checkpointInterval))
     val model = new ALSModel(this, map, map(rank), userFactors, itemFactors)
     Params.inheritValues(map, this, model)
     model
@@ -494,6 +502,7 @@ object ALS extends Logging {
       nonnegative: Boolean = false,
       intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK,
       finalRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK,
+      checkpointInterval: Int = 10,
       seed: Long = 0L)(
       implicit ord: Ordering[ID]): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = {
     require(intermediateRDDStorageLevel != StorageLevel.NONE,
@@ -521,6 +530,18 @@ object ALS extends Logging {
     val seedGen = new XORShiftRandom(seed)
     var userFactors = initialize(userInBlocks, rank, seedGen.nextLong())
     var itemFactors = initialize(itemInBlocks, rank, seedGen.nextLong())
+    var previousCheckpointFile: Option[String] = None
+    val shouldCheckpoint: Int => Boolean = (iter) =>
+      sc.checkpointDir.isDefined && (iter % checkpointInterval == 0)
+    val deletePreviousCheckpointFile: () => Unit = () =>
+      previousCheckpointFile.foreach { file =>
+        try {
+          FileSystem.get(sc.hadoopConfiguration).delete(new Path(file), true)
+        } catch {
+          case e: IOException =>
+            logWarning(s"Cannot delete checkpoint file $file:", e)
+        }
+      }
     if (implicitPrefs) {
       for (iter <- 1 to maxIter) {
         userFactors.setName(s"userFactors-$iter").persist(intermediateRDDStorageLevel)
@@ -528,19 +549,30 @@ object ALS extends Logging {
         itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam,
           userLocalIndexEncoder, implicitPrefs, alpha, solver)
         previousItemFactors.unpersist()
-        if (sc.checkpointDir.isDefined && (iter % 3 == 0)) {
-          itemFactors.checkpoint()
-        }
         itemFactors.setName(s"itemFactors-$iter").persist(intermediateRDDStorageLevel)
+        // TODO: Generalize PeriodicGraphCheckpointer and use it here.
+        if (shouldCheckpoint(iter)) {
+          itemFactors.checkpoint() // itemFactors gets materialized in computeFactors.
+        }
         val previousUserFactors = userFactors
         userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam,
           itemLocalIndexEncoder, implicitPrefs, alpha, solver)
+        if (shouldCheckpoint(iter)) {
+          deletePreviousCheckpointFile()
+          previousCheckpointFile = itemFactors.getCheckpointFile
+        }
         previousUserFactors.unpersist()
       }
     } else {
       for (iter <- 0 until maxIter) {
         itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam,
           userLocalIndexEncoder, solver = solver)
+        if (shouldCheckpoint(iter)) {
+          itemFactors.checkpoint()
+          itemFactors.count() // checkpoint item factors and cut lineage
+          deletePreviousCheckpointFile()
+          previousCheckpointFile = itemFactors.getCheckpointFile
+        }
         userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam,
           itemLocalIndexEncoder, solver = solver)
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index cbd87ea8aeb37..15ca2547d56a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -345,9 +345,13 @@ private[python] class PythonMLLibAPI extends Serializable {
     def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
       predict(SerDe.asTupleRDD(userAndProducts.rdd))
 
-    def getUserFeatures = SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+    def getUserFeatures: RDD[Array[Any]] = {
+      SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+    }
 
-    def getProductFeatures = SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+    def getProductFeatures: RDD[Array[Any]] = {
+      SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+    }
 
   }
 
@@ -909,7 +913,7 @@ private[spark] object SerDe extends Serializable {
   // Pickler for DenseVector
   private[python] class DenseVectorPickler extends BasePickler[DenseVector] {
 
-    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val vector: DenseVector = obj.asInstanceOf[DenseVector]
       val bytes = new Array[Byte](8 * vector.size)
       val bb = ByteBuffer.wrap(bytes)
@@ -941,7 +945,7 @@ private[spark] object SerDe extends Serializable {
   // Pickler for DenseMatrix
   private[python] class DenseMatrixPickler extends BasePickler[DenseMatrix] {
 
-    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val m: DenseMatrix = obj.asInstanceOf[DenseMatrix]
       val bytes = new Array[Byte](8 * m.values.size)
       val order = ByteOrder.nativeOrder()
@@ -973,7 +977,7 @@ private[spark] object SerDe extends Serializable {
   // Pickler for SparseVector
   private[python] class SparseVectorPickler extends BasePickler[SparseVector] {
 
-    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val v: SparseVector = obj.asInstanceOf[SparseVector]
       val n = v.indices.size
       val indiceBytes = new Array[Byte](4 * n)
@@ -1015,7 +1019,7 @@ private[spark] object SerDe extends Serializable {
   // Pickler for LabeledPoint
   private[python] class LabeledPointPickler extends BasePickler[LabeledPoint] {
 
-    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val point: LabeledPoint = obj.asInstanceOf[LabeledPoint]
       saveObjects(out, pickler, point.label, point.features)
     }
@@ -1031,7 +1035,7 @@ private[spark] object SerDe extends Serializable {
   // Pickler for Rating
   private[python] class RatingPickler extends BasePickler[Rating] {
 
-    def saveState(obj: Object, out: OutputStream, pickler: Pickler) = {
+    def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val rating: Rating = obj.asInstanceOf[Rating]
       saveObjects(out, pickler, rating.user, rating.product, rating.rating)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index b787667b018e6..e7c3599ff619c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -163,6 +163,10 @@ class LogisticRegressionModel (
   }
 
   override protected def formatVersion: String = "1.0"
+
+  override def toString: String = {
+    s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.get}"
+  }
 }
 
 object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 2ebc7fa5d4234..d60e82c410979 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.mllib.classification
 
+import java.lang.{Iterable => JIterable}
+
+import scala.collection.JavaConverters._
+
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
@@ -41,6 +45,13 @@ class NaiveBayesModel private[mllib] (
     val pi: Array[Double],
     val theta: Array[Array[Double]]) extends ClassificationModel with Serializable with Saveable {
 
+  /** A Java-friendly constructor that takes three Iterable parameters. */
+  private[mllib] def this(
+      labels: JIterable[Double],
+      pi: JIterable[Double],
+      theta: JIterable[JIterable[Double]]) =
+    this(labels.asScala.toArray, pi.asScala.toArray, theta.asScala.toArray.map(_.asScala.toArray))
+
   private val brzPi = new BDV[Double](pi)
   private val brzTheta = new BDM[Double](theta.length, theta(0).length)
 
@@ -83,10 +94,10 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
 
   private object SaveLoadV1_0 {
 
-    def thisFormatVersion = "1.0"
+    def thisFormatVersion: String = "1.0"
 
     /** Hard-code class name string in case it changes in the future */
-    def thisClassName = "org.apache.spark.mllib.classification.NaiveBayesModel"
+    def thisClassName: String = "org.apache.spark.mllib.classification.NaiveBayesModel"
 
     /** Model data for model import/export */
     case class Data(labels: Array[Double], pi: Array[Double], theta: Array[Array[Double]])
@@ -174,7 +185,7 @@ class NaiveBayes private (private var lambda: Double) extends Serializable with
    *
    * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    */
-  def run(data: RDD[LabeledPoint]) = {
+  def run(data: RDD[LabeledPoint]): NaiveBayesModel = {
     val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
       val values = v match {
         case SparseVector(size, indices, values) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index cfc7f868a02f0..52fb62dcff1b4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -86,6 +86,10 @@ class SVMModel (
   }
 
   override protected def formatVersion: String = "1.0"
+
+  override def toString: String = {
+    s"${super.toString}, numClasses = 2, threshold = ${threshold.get}"
+  }
 }
 
 object SVMModel extends Loader[SVMModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
index 8956189ff1158..3b6790cce47c6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
@@ -32,7 +32,7 @@ private[classification] object GLMClassificationModel {
 
   object SaveLoadV1_0 {
 
-    def thisFormatVersion = "1.0"
+    def thisFormatVersion: String = "1.0"
 
     /** Model data for import/export */
     case class Data(weights: Vector, intercept: Double, threshold: Option[Double])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index e41f941fd2c2c..0f8d6a399682d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -536,5 +536,5 @@ class VectorWithNorm(val vector: Vector, val norm: Double) extends Serializable
   def this(array: Array[Double]) = this(Vectors.dense(array))
 
   /** Converts the vector to a dense vector. */
-  def toDense = new VectorWithNorm(Vectors.dense(vector.toArray), norm)
+  def toDense: VectorWithNorm = new VectorWithNorm(Vectors.dense(vector.toArray), norm)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 707da537d238f..e4e411a3c8b42 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.clustering
 
+import scala.collection.JavaConverters._
+
 import org.json4s._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
@@ -34,6 +36,9 @@ import org.apache.spark.sql.Row
  */
 class KMeansModel (val clusterCenters: Array[Vector]) extends Saveable with Serializable {
 
+  /** A Java-friendly constructor that takes an Iterable of Vectors. */
+  def this(centers: java.lang.Iterable[Vector]) = this(centers.asScala.toArray)
+
   /** Total number of clusters. */
   def k: Int = clusterCenters.length
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index ea10bde5fa252..a8378a76d20ae 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -96,30 +96,30 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
    * Returns precision for a given label (category)
    * @param label the label.
    */
-  def precision(label: Double) = {
+  def precision(label: Double): Double = {
     val tp = tpPerClass(label)
     val fp = fpPerClass.getOrElse(label, 0L)
-    if (tp + fp == 0) 0 else tp.toDouble / (tp + fp)
+    if (tp + fp == 0) 0.0 else tp.toDouble / (tp + fp)
   }
 
   /**
    * Returns recall for a given label (category)
    * @param label the label.
    */
-  def recall(label: Double) = {
+  def recall(label: Double): Double = {
     val tp = tpPerClass(label)
     val fn = fnPerClass.getOrElse(label, 0L)
-    if (tp + fn == 0) 0 else tp.toDouble / (tp + fn)
+    if (tp + fn == 0) 0.0 else tp.toDouble / (tp + fn)
   }
 
   /**
    * Returns f1-measure for a given label (category)
    * @param label the label.
    */
-  def f1Measure(label: Double) = {
+  def f1Measure(label: Double): Double = {
     val p = precision(label)
     val r = recall(label)
-    if((p + r) == 0) 0 else 2 * p * r / (p + r)
+    if((p + r) == 0) 0.0 else 2 * p * r / (p + r)
   }
 
   private lazy val sumTp = tpPerClass.foldLeft(0L) { case (sum, (_, tp)) => sum + tp }
@@ -130,7 +130,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
    * Returns micro-averaged label-based precision
    * (equals to micro-averaged document-based precision)
    */
-  lazy val microPrecision = {
+  lazy val microPrecision: Double = {
     val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
     sumTp.toDouble / (sumTp + sumFp)
   }
@@ -139,7 +139,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
    * Returns micro-averaged label-based recall
    * (equals to micro-averaged document-based recall)
    */
-  lazy val microRecall = {
+  lazy val microRecall: Double = {
     val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
     sumTp.toDouble / (sumTp + sumFn)
   }
@@ -148,7 +148,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
    * Returns micro-averaged label-based f1-measure
    * (equals to micro-averaged document-based f1-measure)
    */
-  lazy val microF1Measure = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
+  lazy val microF1Measure: Double = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
 
   /**
    * Returns the sequence of labels in ascending order
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 0e4a4d0085895..849f44295f089 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -23,9 +23,15 @@ import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHash
 
 import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
 
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
+
 /**
  * Trait for a local matrix.
  */
+@SQLUserDefinedType(udt = classOf[MatrixUDT])
 sealed trait Matrix extends Serializable {
 
   /** Number of rows. */
@@ -102,6 +108,88 @@ sealed trait Matrix extends Serializable {
   private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
 }
 
+@DeveloperApi
+private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
+
+  override def sqlType: StructType = {
+    // type: 0 = sparse, 1 = dense
+    // the dense matrix is built by numRows, numCols, values and isTransposed, all of which are
+    // set as not nullable, except values since in the future, support for binary matrices might
+    // be added for which values are not needed.
+    // the sparse matrix needs colPtrs and rowIndices, which are set as
+    // null, while building the dense matrix.
+    StructType(Seq(
+      StructField("type", ByteType, nullable = false),
+      StructField("numRows", IntegerType, nullable = false),
+      StructField("numCols", IntegerType, nullable = false),
+      StructField("colPtrs", ArrayType(IntegerType, containsNull = false), nullable = true),
+      StructField("rowIndices", ArrayType(IntegerType, containsNull = false), nullable = true),
+      StructField("values", ArrayType(DoubleType, containsNull = false), nullable = true),
+      StructField("isTransposed", BooleanType, nullable = false)
+      ))
+  }
+
+  override def serialize(obj: Any): Row = {
+    val row = new GenericMutableRow(7)
+    obj match {
+      case sm: SparseMatrix =>
+        row.setByte(0, 0)
+        row.setInt(1, sm.numRows)
+        row.setInt(2, sm.numCols)
+        row.update(3, sm.colPtrs.toSeq)
+        row.update(4, sm.rowIndices.toSeq)
+        row.update(5, sm.values.toSeq)
+        row.setBoolean(6, sm.isTransposed)
+
+      case dm: DenseMatrix =>
+        row.setByte(0, 1)
+        row.setInt(1, dm.numRows)
+        row.setInt(2, dm.numCols)
+        row.setNullAt(3)
+        row.setNullAt(4)
+        row.update(5, dm.values.toSeq)
+        row.setBoolean(6, dm.isTransposed)
+    }
+    row
+  }
+
+  override def deserialize(datum: Any): Matrix = {
+    datum match {
+      // TODO: something wrong with UDT serialization, should never happen.
+      case m: Matrix => m
+      case row: Row =>
+        require(row.length == 7,
+          s"MatrixUDT.deserialize given row with length ${row.length} but requires length == 7")
+        val tpe = row.getByte(0)
+        val numRows = row.getInt(1)
+        val numCols = row.getInt(2)
+        val values = row.getAs[Iterable[Double]](5).toArray
+        val isTransposed = row.getBoolean(6)
+        tpe match {
+          case 0 =>
+            val colPtrs = row.getAs[Iterable[Int]](3).toArray
+            val rowIndices = row.getAs[Iterable[Int]](4).toArray
+            new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
+          case 1 =>
+            new DenseMatrix(numRows, numCols, values, isTransposed)
+        }
+    }
+  }
+
+  override def userClass: Class[Matrix] = classOf[Matrix]
+
+  override def equals(o: Any): Boolean = {
+    o match {
+      case v: MatrixUDT => true
+      case _ => false
+    }
+  }
+
+  override def hashCode(): Int = 1994
+
+  private[spark] override def asNullable: MatrixUDT = this
+}
+
 /**
  * Column-major dense matrix.
  * The entry values are stored in a single array of doubles with columns listed in sequence.
@@ -119,6 +207,7 @@ sealed trait Matrix extends Serializable {
  * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
  *                     row major.
  */
+@SQLUserDefinedType(udt = classOf[MatrixUDT])
 class DenseMatrix(
     val numRows: Int,
     val numCols: Int,
@@ -146,12 +235,16 @@ class DenseMatrix(
   def this(numRows: Int, numCols: Int, values: Array[Double]) =
     this(numRows, numCols, values, false)
 
-  override def equals(o: Any) = o match {
+  override def equals(o: Any): Boolean = o match {
     case m: DenseMatrix =>
       m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray)
     case _ => false
   }
 
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(numRows : Integer, numCols: Integer, toArray)
+  }
+
   private[mllib] def toBreeze: BM[Double] = {
     if (!isTransposed) {
       new BDM[Double](numRows, numCols, values)
@@ -173,7 +266,7 @@ class DenseMatrix(
     values(index(i, j)) = v
   }
 
-  override def copy = new DenseMatrix(numRows, numCols, values.clone())
+  override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
 
   private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f))
 
@@ -356,6 +449,7 @@ object DenseMatrix {
  *                     Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
  *                     and `rowIndices` behave as colIndices, and `values` are stored in row major.
  */
+@SQLUserDefinedType(udt = classOf[MatrixUDT])
 class SparseMatrix(
     val numRows: Int,
     val numCols: Int,
@@ -431,7 +525,9 @@ class SparseMatrix(
     }
   }
 
-  override def copy = new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
+  override def copy: SparseMatrix = {
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
+  }
 
   private[mllib] def map(f: Double => Double) =
     new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index e9d25dcb7e778..2cda9b252ee06 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -183,6 +183,8 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] {
     }
   }
 
+  override def hashCode: Int = 7919
+
   private[spark] override def asNullable: VectorUDT = this
 }
 
@@ -478,7 +480,7 @@ class DenseVector(val values: Array[Double]) extends Vector {
 
   private[mllib] override def toBreeze: BV[Double] = new BDV[Double](values)
 
-  override def apply(i: Int) = values(i)
+  override def apply(i: Int): Double = values(i)
 
   override def copy: DenseVector = {
     new DenseVector(values.clone())
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 1d253963130f1..3323ae7b1fba0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -49,7 +49,7 @@ private[mllib] class GridPartitioner(
   private val rowPartitions = math.ceil(rows * 1.0 / rowsPerPart).toInt
   private val colPartitions = math.ceil(cols * 1.0 / colsPerPart).toInt
 
-  override val numPartitions = rowPartitions * colPartitions
+  override val numPartitions: Int = rowPartitions * colPartitions
 
   /**
    * Returns the index of the partition the input coordinate belongs to.
@@ -85,6 +85,14 @@ private[mllib] class GridPartitioner(
         false
     }
   }
+
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(
+      rows: java.lang.Integer,
+      cols: java.lang.Integer,
+      rowsPerPart: java.lang.Integer,
+      colsPerPart: java.lang.Integer)
+  }
 }
 
 private[mllib] object GridPartitioner {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index d5e4f4ccbff10..ef6eccd90711a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -60,6 +60,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
   /**
    * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
    * Smaller value will lead to higher accuracy with the cost of more iterations.
+   * This value must be nonnegative. Lower convergence values are less tolerant
+   * and therefore generally cause more iterations to be run.
    */
   def setConvergenceTol(tolerance: Double): this.type = {
     this.convergenceTol = tolerance
@@ -142,7 +144,9 @@ object LBFGS extends Logging {
    *                   one single data example)
    * @param updater - Updater function to actually perform a gradient step in a given direction.
    * @param numCorrections - The number of corrections used in the L-BFGS update.
-   * @param convergenceTol - The convergence tolerance of iterations for L-BFGS
+   * @param convergenceTol - The convergence tolerance of iterations for L-BFGS which is must be
+   *                         nonnegative. Lower values are less tolerant and therefore generally
+   *                         cause more iterations to be run.
    * @param maxNumIterations - Maximal number of iterations that L-BFGS can be run.
    * @param regParam - Regularization parameter
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index 405bae62ee8b6..9349ecaa13f56 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -56,7 +56,7 @@ class UniformGenerator extends RandomDataGenerator[Double] {
     random.nextDouble()
   }
 
-  override def setSeed(seed: Long) = random.setSeed(seed)
+  override def setSeed(seed: Long): Unit = random.setSeed(seed)
 
   override def copy(): UniformGenerator = new UniformGenerator()
 }
@@ -75,7 +75,7 @@ class StandardNormalGenerator extends RandomDataGenerator[Double] {
       random.nextGaussian()
   }
 
-  override def setSeed(seed: Long) = random.setSeed(seed)
+  override def setSeed(seed: Long): Unit = random.setSeed(seed)
 
   override def copy(): StandardNormalGenerator = new StandardNormalGenerator()
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
new file mode 100644
index 0000000000000..9213fd3f595c3
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.rdd
+
+import scala.language.implicitConversions
+import scala.reflect.ClassTag
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.BoundedPriorityQueue
+
+/**
+ * Machine learning specific Pair RDD functions.
+ */
+@DeveloperApi
+class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Serializable {
+  /**
+   * Returns the top k (largest) elements for each key from this RDD as defined by the specified
+   * implicit Ordering[T].
+   * If the number of elements for a certain key is less than k, all of them will be returned.
+   *
+   * @param num k, the number of top elements to return
+   * @param ord the implicit ordering for T
+   * @return an RDD that contains the top k values for each key
+   */
+  def topByKey(num: Int)(implicit ord: Ordering[V]): RDD[(K, Array[V])] = {
+    self.aggregateByKey(new BoundedPriorityQueue[V](num)(ord))(
+      seqOp = (queue, item) => {
+        queue += item
+        queue
+      },
+      combOp = (queue1, queue2) => {
+        queue1 ++= queue2
+        queue1
+      }
+    ).mapValues(_.toArray.sorted(ord.reverse))
+  }
+}
+
+@DeveloperApi
+object MLPairRDDFunctions {
+  /** Implicit conversion from a pair RDD to MLPairRDDFunctions. */
+  implicit def fromPairRDD[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)]): MLPairRDDFunctions[K, V] =
+    new MLPairRDDFunctions[K, V](rdd)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index caacab943030b..dddefe1944e9d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -82,6 +82,9 @@ class ALS private (
   private var intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
   private var finalRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK
 
+  /** checkpoint interval */
+  private var checkpointInterval: Int = 10
+
   /**
    * Set the number of blocks for both user blocks and product blocks to parallelize the computation
    * into; pass -1 for an auto-configured number of blocks. Default: -1.
@@ -182,6 +185,19 @@ class ALS private (
     this
   }
 
+  /**
+   * Set period (in iterations) between checkpoints (default = 10). Checkpointing helps with
+   * recovery (when nodes fail) and StackOverflow exceptions caused by long lineage. It also helps
+   * with eliminating temporary shuffle files on disk, which can be important when there are many
+   * ALS iterations. If the checkpoint directory is not set in [[org.apache.spark.SparkContext]],
+   * this setting is ignored.
+   */
+  @DeveloperApi
+  def setCheckpointInterval(checkpointInterval: Int): this.type = {
+    this.checkpointInterval = checkpointInterval
+    this
+  }
+
   /**
    * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
    * Returns a MatrixFactorizationModel with feature vectors for each user and product.
@@ -212,6 +228,7 @@ class ALS private (
       nonnegative = nonnegative,
       intermediateRDDStorageLevel = intermediateRDDStorageLevel,
       finalRDDStorageLevel = StorageLevel.NONE,
+      checkpointInterval = checkpointInterval,
       seed = seed)
 
     val userFactors = floatUserFactors
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 7c66e8cdebdbe..45b9ebb4cc0d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -76,7 +76,12 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
     predictPoint(testData, weights, intercept)
   }
 
-  override def toString() = "(weights=%s, intercept=%s)".format(weights, intercept)
+  /**
+   * Print a summary of the model.
+   */
+  override def toString: String = {
+    s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}"
+  }
 }
 
 /**
@@ -123,6 +128,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    */
   private var useFeatureScaling = false
 
+  /**
+   * The dimension of training features.
+   */
+  def getNumFeatures: Int = this.numFeatures
+
   /**
    * The dimension of training features.
    */
@@ -141,6 +151,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    */
   protected def createModel(weights: Vector, intercept: Double): M
 
+  /**
+   * Get if the algorithm uses addIntercept
+   */
+  def isAddIntercept: Boolean = this.addIntercept
+
   /**
    * Set if the algorithm should add an intercept. Default false.
    * We set the default to false because adding the intercept will cause memory allocation.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
index bd7e340ca2d8e..b55944f74f623 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
@@ -32,7 +32,7 @@ private[regression] object GLMRegressionModel {
 
   object SaveLoadV1_0 {
 
-    def thisFormatVersion = "1.0"
+    def thisFormatVersion: String = "1.0"
 
     /** Model data for model import/export */
     case class Data(weights: Vector, intercept: Double)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 8d5c36da32bdb..ada227c200a79 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -83,10 +83,13 @@ class Strategy (
     @BeanProperty var useNodeIdCache: Boolean = false,
     @BeanProperty var checkpointInterval: Int = 10) extends Serializable {
 
-  def isMulticlassClassification =
+  def isMulticlassClassification: Boolean = {
     algo == Classification && numClasses > 2
-  def isMulticlassWithCategoricalFeatures
-    = isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
+  }
+
+  def isMulticlassWithCategoricalFeatures: Boolean = {
+    isMulticlassClassification && (categoricalFeaturesInfo.size > 0)
+  }
 
   /**
    * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index b7950e00786ab..5ac10f3fd32dd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -71,7 +71,7 @@ object Entropy extends Impurity {
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
    */
-  def instance = this
+  def instance: this.type = this
 
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c946db9c0d1c8..19d318203c344 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -67,7 +67,7 @@ object Gini extends Impurity {
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
    */
-  def instance = this
+  def instance: this.type = this
 
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index df9eafa5da16a..7104a7fa4dd4c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -58,7 +58,7 @@ object Variance extends Impurity {
    * Get this impurity instance.
    * This is useful for passing impurity parameters to a Strategy in Java.
    */
-  def instance = this
+  def instance: this.type = this
 
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
index d1bde15e6b150..793dd664c5d5a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
@@ -47,18 +47,9 @@ object AbsoluteError extends Loss {
     if ((point.label - model.predict(point.features)) < 0) 1.0 else -1.0
   }
 
-  /**
-   * Method to calculate loss of the base learner for the gradient boosting calculation.
-   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
-   * purposes.
-   * @param model Ensemble model
-   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   * @return  Mean absolute error of model on data
-   */
-  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
-    data.map { y =>
-      val err = model.predict(y.features) - y.label
-      math.abs(err)
-    }.mean()
+  override def computeError(prediction: Double, label: Double): Double = {
+    val err = label - prediction
+    math.abs(err)
   }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
index 55213e695638c..51b1aed167b66 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
@@ -50,20 +50,10 @@ object LogLoss extends Loss {
     - 4.0 * point.label / (1.0 + math.exp(2.0 * point.label * prediction))
   }
 
-  /**
-   * Method to calculate loss of the base learner for the gradient boosting calculation.
-   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
-   * purposes.
-   * @param model Ensemble model
-   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   * @return Mean log loss of model on data
-   */
-  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
-    data.map { case point =>
-      val prediction = model.predict(point.features)
-      val margin = 2.0 * point.label * prediction
-      // The following is equivalent to 2.0 * log(1 + exp(-margin)) but more numerically stable.
-      2.0 * MLUtils.log1pExp(-margin)
-    }.mean()
+  override def computeError(prediction: Double, label: Double): Double = {
+    val margin = 2.0 * label * prediction
+    // The following is equivalent to 2.0 * log(1 + exp(-margin)) but more numerically stable.
+    2.0 * MLUtils.log1pExp(-margin)
   }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index e1169d9f66ea4..357869ff6b333 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -47,6 +47,18 @@ trait Loss extends Serializable {
    * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return Measure of model error on data
    */
-  def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double
+  def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
+    data.map(point => computeError(model.predict(point.features), point.label)).mean()
+  }
+
+  /**
+   * Method to calculate loss when the predictions are already known.
+   * Note: This method is used in the method evaluateEachIteration to avoid recomputing the
+   * predicted values from previously fit trees.
+   * @param prediction Predicted label.
+   * @param label True label.
+   * @return Measure of model error on datapoint.
+   */
+  def computeError(prediction: Double, label: Double): Double
 
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
index 50ecaa2f86f35..b990707ca4525 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
@@ -47,18 +47,9 @@ object SquaredError extends Loss {
     2.0 * (model.predict(point.features) - point.label)
   }
 
-  /**
-   * Method to calculate loss of the base learner for the gradient boosting calculation.
-   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
-   * purposes.
-   * @param model Ensemble model
-   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   * @return  Mean squared error of model on data
-   */
-  override def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
-    data.map { y =>
-      val err = model.predict(y.features) - y.label
-      err * err
-    }.mean()
+  override def computeError(prediction: Double, label: Double): Double = {
+    val err = prediction - label
+    err * err
   }
+
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 8a57ebc387d01..c9bafd60fba4d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -120,10 +120,10 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
 
   private[tree] object SaveLoadV1_0 {
 
-    def thisFormatVersion = "1.0"
+    def thisFormatVersion: String = "1.0"
 
     // Hard-code class name string in case it changes in the future
-    def thisClassName = "org.apache.spark.mllib.tree.DecisionTreeModel"
+    def thisClassName: String = "org.apache.spark.mllib.tree.DecisionTreeModel"
 
     case class PredictData(predict: Double, prob: Double) {
       def toPredict: Predict = new Predict(predict, prob)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 80990aa9a603f..f209fdafd3653 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -38,23 +38,32 @@ class InformationGainStats(
     val leftPredict: Predict,
     val rightPredict: Predict) extends Serializable {
 
-  override def toString = {
+  override def toString: String = {
     "gain = %f, impurity = %f, left impurity = %f, right impurity = %f"
       .format(gain, impurity, leftImpurity, rightImpurity)
   }
 
-  override def equals(o: Any) =
-    o match {
-      case other: InformationGainStats => {
-        gain == other.gain &&
-        impurity == other.impurity &&
-        leftImpurity == other.leftImpurity &&
-        rightImpurity == other.rightImpurity &&
-        leftPredict == other.leftPredict &&
-        rightPredict == other.rightPredict
-      }
-      case _ => false
-    }
+  override def equals(o: Any): Boolean = o match {
+    case other: InformationGainStats =>
+      gain == other.gain &&
+      impurity == other.impurity &&
+      leftImpurity == other.leftImpurity &&
+      rightImpurity == other.rightImpurity &&
+      leftPredict == other.leftPredict &&
+      rightPredict == other.rightPredict
+
+    case _ => false
+  }
+
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(
+      gain: java.lang.Double,
+      impurity: java.lang.Double,
+      leftImpurity: java.lang.Double,
+      rightImpurity: java.lang.Double,
+      leftPredict,
+      rightPredict)
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index d961081d185e9..4f72bb8014cc0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -50,8 +50,10 @@ class Node (
     var rightNode: Option[Node],
     var stats: Option[InformationGainStats]) extends Serializable with Logging {
 
-  override def toString = "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", " +
-    "impurity =  " + impurity + "split = " + split + ", stats = " + stats
+  override def toString: String = {
+    "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", " +
+      "impurity =  " + impurity + "split = " + split + ", stats = " + stats
+  }
 
   /**
    * build the left node and right nodes if not leaf
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index ad4c0dbbfb3e5..25990af7c6cf7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -29,7 +29,7 @@ class Predict(
     val predict: Double,
     val prob: Double = 0.0) extends Serializable {
 
-  override def toString = {
+  override def toString: String = {
     "predict = %f, prob = %f".format(predict, prob)
   }
 
@@ -39,4 +39,8 @@ class Predict(
       case _ => false
     }
   }
+
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(predict: java.lang.Double, prob: java.lang.Double)
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index b7a85f58544a3..fb35e70a8d077 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -38,9 +38,10 @@ case class Split(
     featureType: FeatureType,
     categories: List[Double]) {
 
-  override def toString =
+  override def toString: String = {
     "Feature = " + feature + ", threshold = " + threshold + ", featureType =  " + featureType +
       ", categories = " + categories
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 30a8f7ca301af..1950254b2aa6d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -28,9 +28,11 @@ import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.Algo
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.EnsembleCombiningStrategy._
+import org.apache.spark.mllib.tree.loss.Loss
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
@@ -79,7 +81,7 @@ object RandomForestModel extends Loader[RandomForestModel] {
 
   private object SaveLoadV1_0 {
     // Hard-code class name string in case it changes in the future
-    def thisClassName = "org.apache.spark.mllib.tree.model.RandomForestModel"
+    def thisClassName: String = "org.apache.spark.mllib.tree.model.RandomForestModel"
   }
 
 }
@@ -108,6 +110,58 @@ class GradientBoostedTreesModel(
   }
 
   override protected def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion
+
+  /**
+   * Method to compute error or loss for every iteration of gradient boosting.
+   * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @param loss evaluation metric.
+   * @return an array with index i having the losses or errors for the ensemble
+   *         containing the first i+1 trees
+   */
+  def evaluateEachIteration(
+      data: RDD[LabeledPoint],
+      loss: Loss): Array[Double] = {
+
+    val sc = data.sparkContext
+    val remappedData = algo match {
+      case Classification => data.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+      case _ => data
+    }
+
+    val numIterations = trees.length
+    val evaluationArray = Array.fill(numIterations)(0.0)
+
+    var predictionAndError: RDD[(Double, Double)] = remappedData.map { i =>
+      val pred = treeWeights(0) * trees(0).predict(i.features)
+      val error = loss.computeError(pred, i.label)
+      (pred, error)
+    }
+    evaluationArray(0) = predictionAndError.values.mean()
+
+    // Avoid the model being copied across numIterations.
+    val broadcastTrees = sc.broadcast(trees)
+    val broadcastWeights = sc.broadcast(treeWeights)
+
+    (1 until numIterations).map { nTree =>
+      predictionAndError = remappedData.zip(predictionAndError).mapPartitions { iter =>
+        val currentTree = broadcastTrees.value(nTree)
+        val currentTreeWeight = broadcastWeights.value(nTree)
+        iter.map {
+          case (point, (pred, error)) => {
+            val newPred = pred + currentTree.predict(point.features) * currentTreeWeight
+            val newError = loss.computeError(newPred, point.label)
+            (newPred, newError)
+          }
+        }
+      }
+      evaluationArray(nTree) = predictionAndError.values.mean()
+    }
+
+    broadcastTrees.unpersist()
+    broadcastWeights.unpersist()
+    evaluationArray
+  }
+
 }
 
 object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
@@ -130,7 +184,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
 
   private object SaveLoadV1_0 {
     // Hard-code class name string in case it changes in the future
-    def thisClassName = "org.apache.spark.mllib.tree.model.GradientBoostedTreesModel"
+    def thisClassName: String = "org.apache.spark.mllib.tree.model.GradientBoostedTreesModel"
   }
 
 }
@@ -257,7 +311,7 @@ private[tree] object TreeEnsembleModel extends Logging {
 
     import org.apache.spark.mllib.tree.model.DecisionTreeModel.SaveLoadV1_0.{NodeData, constructTrees}
 
-    def thisFormatVersion = "1.0"
+    def thisFormatVersion: String = "1.0"
 
     case class Metadata(
         algo: String,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index bb86bafc0eb0a..0bb06e9e8ac9c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ml.recommendation
 
+import java.io.File
 import java.util.Random
 
 import scala.collection.mutable
@@ -32,16 +33,25 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.util.Utils
 
 class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
 
   private var sqlContext: SQLContext = _
+  private var tempDir: File = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
+    tempDir = Utils.createTempDir()
+    sc.setCheckpointDir(tempDir.getAbsolutePath)
     sqlContext = new SQLContext(sc)
   }
 
+  override def afterAll(): Unit = {
+    Utils.deleteRecursively(tempDir)
+    super.afterAll()
+  }
+
   test("LocalIndexEncoder") {
     val random = new Random
     for (numBlocks <- Seq(1, 2, 5, 10, 20, 50, 100)) {
@@ -485,4 +495,11 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging {
       }.count()
     }
   }
+
+  test("als with large number of iterations") {
+    val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1)
+    ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2)
+    ALS.train(
+      ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index c098b5458fe6b..96f677db3f377 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -424,4 +424,17 @@ class MatricesSuite extends FunSuite {
     assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
     assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
   }
+
+  test("MatrixUDT") {
+    val dm1 = new DenseMatrix(2, 2, Array(0.9, 1.2, 2.3, 9.8))
+    val dm2 = new DenseMatrix(3, 2, Array(0.0, 1.21, 2.3, 9.8, 9.0, 0.0))
+    val dm3 = new DenseMatrix(0, 0, Array())
+    val sm1 = dm1.toSparse
+    val sm2 = dm2.toSparse
+    val sm3 = dm3.toSparse
+    val mUDT = new MatrixUDT()
+    Seq(dm1, dm2, dm3, sm1, sm2, sm3).foreach {
+        mat => assert(mat.toArray === mUDT.deserialize(mUDT.serialize(mat)).toArray)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
new file mode 100644
index 0000000000000..1ac7c12c4e8e6
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.rdd
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
+
+class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
+  test("topByKey") {
+    val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (3, 5), (5, 1), (5, 3)), 2)
+      .topByKey(2)
+      .collectAsMap()
+
+    assert(topMap.size === 3)
+    assert(topMap(1) === Array(2, 1))
+    assert(topMap(3) === Array(7, 5))
+    assert(topMap(5) === Array(3, 1))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
index b437aeaaf0547..55b0bac7d49fe 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
@@ -175,10 +175,11 @@ class GradientBoostedTreesSuite extends FunSuite with MLlibTestSparkContext {
           new BoostingStrategy(treeStrategy, loss, numIterations, validationTol = 0.0)
         val gbtValidate = new GradientBoostedTrees(boostingStrategy)
           .runWithValidation(trainRdd, validateRdd)
-        assert(gbtValidate.numTrees !== numIterations)
+        val numTrees = gbtValidate.numTrees
+        assert(numTrees !== numIterations)
 
         // Test that it performs better on the validation dataset.
-        val gbt = GradientBoostedTrees.train(trainRdd, boostingStrategy)
+        val gbt = new GradientBoostedTrees(boostingStrategy).run(trainRdd)
         val (errorWithoutValidation, errorWithValidation) = {
           if (algo == Classification) {
             val remappedRdd = validateRdd.map(x => new LabeledPoint(2 * x.label - 1, x.features))
@@ -188,6 +189,17 @@ class GradientBoostedTreesSuite extends FunSuite with MLlibTestSparkContext {
           }
         }
         assert(errorWithValidation <= errorWithoutValidation)
+
+        // Test that results from evaluateEachIteration comply with runWithValidation.
+        // Note that convergenceTol is set to 0.0
+        val evaluationArray = gbt.evaluateEachIteration(validateRdd, loss)
+        assert(evaluationArray.length === numIterations)
+        assert(evaluationArray(numTrees) > evaluationArray(numTrees - 1))
+        var i = 1
+        while (i < numTrees) {
+          assert(evaluationArray(i) <= evaluationArray(i - 1))
+          i += 1
+        }
       }
     }
   }
diff --git a/network/common/pom.xml b/network/common/pom.xml
index 74437f37c47e4..7b51845206f4a 100644
--- a/network/common/pom.xml
+++ b/network/common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml
index a2bcca26d8344..7dc7c65825e34 100644
--- a/network/shuffle/pom.xml
+++ b/network/shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml
index cea7a20c223e2..1e2e9c80af6cc 100644
--- a/network/yarn/pom.xml
+++ b/network/yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 6fc56a86d44ac..23bb16130b504 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.10</artifactId>
-  <version>1.3.0-SNAPSHOT</version>
+  <version>1.4.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
@@ -120,7 +120,7 @@
     <mesos.classifier>shaded-protobuf</mesos.classifier>
     <slf4j.version>1.7.10</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
-    <hadoop.version>1.0.4</hadoop.version>
+    <hadoop.version>2.2.0</hadoop.version>
     <protobuf.version>2.4.1</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
     <hbase.version>0.98.7-hadoop1</hbase.version>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index f0cbf4e57b8c5..dde92949fa175 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -91,7 +91,7 @@ object MimaBuild {
 
   def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "1.2.0"
+    val previousSparkVersion = "1.3.0"
     val fullId = "spark-" + projectRef.project + "_2.10"
     mimaDefaultSettings ++
     Seq(previousArtifact := Some(organization % fullId % previousSparkVersion),
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index a6b07fa7cddec..328d59485a731 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -16,6 +16,7 @@
  */
 
 import com.typesafe.tools.mima.core._
+import com.typesafe.tools.mima.core.ProblemFilters._
 
 /**
  * Additional excludes for checking of Spark's binary compatibility.
@@ -33,6 +34,19 @@ import com.typesafe.tools.mima.core._
 object MimaExcludes {
     def excludes(version: String) =
       version match {
+        case v if v.startsWith("1.4") =>
+          Seq(
+            MimaBuild.excludeSparkPackage("deploy"),
+            MimaBuild.excludeSparkPackage("ml"),
+            // SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
+            ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"),
+            // These are needed if checking against the sbt build, since they are part of
+            // the maven-generated artifacts in 1.3.
+            excludePackage("org.spark-project.jetty"),
+            MimaBuild.excludeSparkPackage("unused"),
+            ProblemFilters.exclude[MissingClassProblem]("com.google.common.base.Optional")
+          )
+
         case v if v.startsWith("1.3") =>
           Seq(
             MimaBuild.excludeSparkPackage("deploy"),
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 43d2cf5171880..0a16cbd8bff62 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -38,10 +38,8 @@ def launch_gateway():
         # proper classpath and settings from spark-env.sh
         on_windows = platform.system() == "Windows"
         script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
-        submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
-        submit_args = submit_args if submit_args is not None else ""
-        submit_args = shlex.split(submit_args)
-        command = [os.path.join(SPARK_HOME, script)] + submit_args
+        submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
+        command = [os.path.join(SPARK_HOME, script)] + shlex.split(submit_args)
 
         # Start a socket that will be used by PythonGatewayServer to communicate its port to us
         callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index e4765173709e8..6766f3ebb8894 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -21,9 +21,10 @@
 from numpy import array
 
 from pyspark import RDD
-from pyspark.mllib.common import callMLlibFunc
+from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
 from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint, LinearModel, _regression_train_wrapper
+from pyspark.mllib.util import Saveable, Loader, inherit_doc
 
 
 __all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'LogisticRegressionWithLBFGS',
@@ -99,6 +100,18 @@ class LogisticRegressionModel(LinearBinaryClassificationModel):
     1
     >>> lrm.predict(SparseVector(2, {0: 1.0}))
     0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LogisticRegressionModel.load(sc, path)
+    >>> sameModel.predict(array([0.0, 1.0]))
+    1
+    >>> sameModel.predict(SparseVector(2, {0: 1.0}))
+    0
+    >>> try:
+    ...    os.removedirs(path)
+    ... except:
+    ...    pass
     """
     def __init__(self, weights, intercept):
         super(LogisticRegressionModel, self).__init__(weights, intercept)
@@ -124,6 +137,22 @@ def predict(self, x):
         else:
             return 1 if prob > self._threshold else 0
 
+    def save(self, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        threshold = java_model.getThreshold().get()
+        model = LogisticRegressionModel(weights, intercept)
+        model.setThreshold(threshold)
+        return model
+
 
 class LogisticRegressionWithSGD(object):
 
@@ -243,6 +272,18 @@ class SVMModel(LinearBinaryClassificationModel):
     1
     >>> svm.predict(SparseVector(2, {0: -1.0}))
     0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> svm.save(sc, path)
+    >>> sameModel = SVMModel.load(sc, path)
+    >>> sameModel.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> sameModel.predict(SparseVector(2, {0: -1.0}))
+    0
+    >>> try:
+    ...    os.removedirs(path)
+    ... except:
+    ...    pass
     """
     def __init__(self, weights, intercept):
         super(SVMModel, self).__init__(weights, intercept)
@@ -263,6 +304,22 @@ def predict(self, x):
         else:
             return 1 if margin > self._threshold else 0
 
+    def save(self, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        threshold = java_model.getThreshold().get()
+        model = SVMModel(weights, intercept)
+        model.setThreshold(threshold)
+        return model
+
 
 class SVMWithSGD(object):
 
@@ -303,7 +360,8 @@ def train(rdd, i):
         return _regression_train_wrapper(train, SVMModel, data, initialWeights)
 
 
-class NaiveBayesModel(object):
+@inherit_doc
+class NaiveBayesModel(Saveable, Loader):
 
     """
     Model for Naive Bayes classifiers.
@@ -334,6 +392,16 @@ class NaiveBayesModel(object):
     0.0
     >>> model.predict(SparseVector(2, {0: 1.0}))
     1.0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = NaiveBayesModel.load(sc, path)
+    >>> sameModel.predict(SparseVector(2, {0: 1.0})) == model.predict(SparseVector(2, {0: 1.0}))
+    True
+    >>> try:
+    ...     os.removedirs(path)
+    ... except OSError:
+    ...     pass
     """
 
     def __init__(self, labels, pi, theta):
@@ -348,6 +416,23 @@ def predict(self, x):
         x = _convert_to_vector(x)
         return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
 
+    def save(self, sc, path):
+        java_labels = _py2java(sc, self.labels.tolist())
+        java_pi = _py2java(sc, self.pi.tolist())
+        java_theta = _py2java(sc, self.theta.tolist())
+        java_model = sc._jvm.org.apache.spark.mllib.classification.NaiveBayesModel(
+            java_labels, java_pi, java_theta)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.classification.NaiveBayesModel.load(
+            sc._jsc.sc(), path)
+        py_labels = _java2py(sc, java_model.labels())
+        py_pi = _java2py(sc, java_model.pi())
+        py_theta = _java2py(sc, java_model.theta())
+        return NaiveBayesModel(py_labels, py_pi, numpy.array(py_theta))
+
 
 class NaiveBayes(object):
 
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 949db5705abd7..464f49aeee3cd 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -19,14 +19,16 @@
 
 from pyspark import RDD
 from pyspark import SparkContext
-from pyspark.mllib.common import callMLlibFunc, callJavaFunc
-from pyspark.mllib.linalg import DenseVector, SparseVector, _convert_to_vector
+from pyspark.mllib.common import callMLlibFunc, callJavaFunc, _py2java, _java2py
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector
 from pyspark.mllib.stat.distribution import MultivariateGaussian
+from pyspark.mllib.util import Saveable, Loader, inherit_doc
 
 __all__ = ['KMeansModel', 'KMeans', 'GaussianMixtureModel', 'GaussianMixture']
 
 
-class KMeansModel(object):
+@inherit_doc
+class KMeansModel(Saveable, Loader):
 
     """A clustering model derived from the k-means method.
 
@@ -55,6 +57,16 @@ class KMeansModel(object):
     True
     >>> type(model.clusterCenters)
     <type 'list'>
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = KMeansModel.load(sc, path)
+    >>> sameModel.predict(sparse_data[0]) == model.predict(sparse_data[0])
+    True
+    >>> try:
+    ...     os.removedirs(path)
+    ... except OSError:
+    ...     pass
     """
 
     def __init__(self, centers):
@@ -77,6 +89,16 @@ def predict(self, x):
                 best_distance = distance
         return best
 
+    def save(self, sc, path):
+        java_centers = _py2java(sc, map(_convert_to_vector, self.centers))
+        java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel.load(sc._jsc.sc(), path)
+        return KMeansModel(_java2py(sc, java_model.clusterCenters()))
+
 
 class KMeans(object):
 
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 621591c26b77f..a539d2f2846f9 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -70,8 +70,8 @@ def _py2java(sc, obj):
         obj = _to_java_object_rdd(obj)
     elif isinstance(obj, SparkContext):
         obj = obj._jsc
-    elif isinstance(obj, list) and (obj or isinstance(obj[0], JavaObject)):
-        obj = ListConverter().convert(obj, sc._gateway._gateway_client)
+    elif isinstance(obj, list):
+        obj = ListConverter().convert([_py2java(sc, x) for x in obj], sc._gateway._gateway_client)
     elif isinstance(obj, JavaObject):
         pass
     elif isinstance(obj, (int, long, float, bool, basestring)):
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 0c21ad578793f..414a0ada80787 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -18,8 +18,9 @@
 import numpy as np
 from numpy import array
 
-from pyspark.mllib.common import callMLlibFunc, inherit_doc
+from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
 from pyspark.mllib.linalg import SparseVector, _convert_to_vector
+from pyspark.mllib.util import Saveable, Loader
 
 __all__ = ['LabeledPoint', 'LinearModel',
            'LinearRegressionModel', 'LinearRegressionWithSGD',
@@ -114,6 +115,20 @@ class LinearRegressionModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LinearRegressionModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> try:
+    ...    os.removedirs(path)
+    ... except:
+    ...    pass
     >>> data = [
     ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
     ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
@@ -126,6 +141,19 @@ class LinearRegressionModel(LinearRegressionModelBase):
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     """
+    def save(self, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = LinearRegressionModel(weights, intercept)
+        return model
 
 
 # train_func should take two parameters, namely data and initial_weights, and
@@ -135,7 +163,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
     first = data.first()
     if not isinstance(first, LabeledPoint):
         raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
-    initial_weights = initial_weights or [0.0] * len(data.first().features)
+    if initial_weights is None:
+        initial_weights = [0.0] * len(data.first().features)
     weights, intercept = train_func(data, _convert_to_vector(initial_weights))
     return modelClass(weights, intercept)
 
@@ -199,6 +228,20 @@ class LassoModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LassoModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> try:
+    ...    os.removedirs(path)
+    ... except:
+    ...    pass
     >>> data = [
     ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
     ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
@@ -211,6 +254,19 @@ class LassoModel(LinearRegressionModelBase):
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     """
+    def save(self, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = LassoModel(weights, intercept)
+        return model
 
 
 class LassoWithSGD(object):
@@ -246,6 +302,20 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = RidgeRegressionModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> try:
+    ...    os.removedirs(path)
+    ... except:
+    ...    pass
     >>> data = [
     ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
     ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
@@ -258,6 +328,19 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
     """
+    def save(self, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    def load(cls, sc, path):
+        java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = RidgeRegressionModel(weights, intercept)
+        return model
 
 
 class RidgeRegressionWithSGD(object):
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 5328d99b69684..155019638f806 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -323,6 +323,13 @@ def test_regression(self):
         self.assertTrue(gbt_model.predict(features[2]) <= 0)
         self.assertTrue(gbt_model.predict(features[3]) > 0)
 
+        try:
+            LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+            LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+            RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]))
+        except ValueError:
+            self.fail()
+
 
 class StatTests(PySparkTestCase):
     # SPARK-4023
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index e877c720ac77a..c5c3468eb95e9 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -20,7 +20,6 @@
 
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper, inherit_doc
 from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
-from pyspark.mllib.regression import LabeledPoint
 
 
 class MLUtils(object):
@@ -50,6 +49,7 @@ def _parse_libsvm_line(line, multiclass=None):
     @staticmethod
     def _convert_labeled_point_to_libsvm(p):
         """Converts a LabeledPoint to a string in LIBSVM format."""
+        from pyspark.mllib.regression import LabeledPoint
         assert isinstance(p, LabeledPoint)
         items = [str(p.label)]
         v = _convert_to_vector(p.features)
@@ -92,6 +92,7 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None
 
         >>> from tempfile import NamedTemporaryFile
         >>> from pyspark.mllib.util import MLUtils
+        >>> from pyspark.mllib.regression import LabeledPoint
         >>> tempFile = NamedTemporaryFile(delete=True)
         >>> tempFile.write("+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
         >>> tempFile.flush()
@@ -110,6 +111,7 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None
         >>> print examples[2]
         (-1.0,(6,[1,3,5],[4.0,5.0,6.0]))
         """
+        from pyspark.mllib.regression import LabeledPoint
         if multiclass is not None:
             warnings.warn("deprecated", DeprecationWarning)
 
@@ -130,6 +132,7 @@ def saveAsLibSVMFile(data, dir):
 
         >>> from tempfile import NamedTemporaryFile
         >>> from fileinput import input
+        >>> from pyspark.mllib.regression import LabeledPoint
         >>> from glob import glob
         >>> from pyspark.mllib.util import MLUtils
         >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, 1.23), (2, 4.56)])), \
@@ -156,6 +159,7 @@ def loadLabeledPoints(sc, path, minPartitions=None):
 
         >>> from tempfile import NamedTemporaryFile
         >>> from pyspark.mllib.util import MLUtils
+        >>> from pyspark.mllib.regression import LabeledPoint
         >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, -1.23), (2, 4.56e-7)])), \
                         LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
         >>> tempFile = NamedTemporaryFile(delete=True)
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index bf17f513c0bc3..c337a43c8a7fc 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -346,6 +346,12 @@ def sample(self, withReplacement, fraction, seed=None):
         """
         Return a sampled subset of this RDD.
 
+        :param withReplacement: can elements be sampled multiple times (replaced when sampled out)
+        :param fraction: expected size of the sample as a fraction of this RDD's size
+            without replacement: probability that each element is chosen; fraction must be [0, 1]
+            with replacement: expected number of times each element is chosen; fraction must be >= 0
+        :param seed: seed for the random number generator
+
         >>> rdd = sc.parallelize(range(100), 4)
         >>> rdd.sample(False, 0.1, 81).count()
         10
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 94001aec3774b..5cb89da7a8ed5 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -162,7 +162,7 @@ def _java_save_mode(self, mode):
                 "Only 'append', 'overwrite', 'ignore', and 'error' are acceptable save mode.")
         return jmode
 
-    def saveAsTable(self, tableName, source=None, mode="append", **options):
+    def saveAsTable(self, tableName, source=None, mode="error", **options):
         """Saves the contents of the :class:`DataFrame` to a data source as a table.
 
         The data source is specified by the `source` and a set of `options`.
@@ -188,7 +188,7 @@ def saveAsTable(self, tableName, source=None, mode="append", **options):
                                           self.sql_ctx._sc._gateway._gateway_client)
         self._jdf.saveAsTable(tableName, source, jmode, joptions)
 
-    def save(self, path=None, source=None, mode="append", **options):
+    def save(self, path=None, source=None, mode="error", **options):
         """Saves the contents of the :class:`DataFrame` to a data source.
 
         The data source is specified by the `source` and a set of `options`.
diff --git a/repl/pom.xml b/repl/pom.xml
index 295f88ea3ecf9..edfa1c7f2c29c 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 249f438459300..934daaeaafca1 100644
--- a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -121,9 +121,9 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local",
       """
         |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -137,7 +137,7 @@ class ReplSuite extends FunSuite {
         |class C {
         |def foo = 5
         |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -148,7 +148,7 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local",
       """
         |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => double(x)).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -160,9 +160,9 @@ class ReplSuite extends FunSuite {
       """
         |var v = 7
         |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -178,9 +178,9 @@ class ReplSuite extends FunSuite {
       """
         |var array = new Array[Int](5)
         |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -216,14 +216,14 @@ class ReplSuite extends FunSuite {
       """
         |var v = 7
         |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |var array = new Array[Int](5)
         |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -262,7 +262,7 @@ class ReplSuite extends FunSuite {
         |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
         |import sqlContext.implicits._
         |case class TestCaseClass(value: Int)
-        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF.collect()
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -275,7 +275,7 @@ class ReplSuite extends FunSuite {
       |val t = new TestClass
       |import t.testMethod
       |case class TestCaseClass(value: Int)
-      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect()
     """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -287,14 +287,14 @@ class ReplSuite extends FunSuite {
         """
           |var v = 7
           |def getV() = v
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
           |v = 10
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
           |var array = new Array[Int](5)
           |val broadcastArray = sc.broadcast(array)
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
           |array(0) = 5
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         """.stripMargin)
       assertDoesNotContain("error:", output)
       assertDoesNotContain("Exception", output)
@@ -309,10 +309,22 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local[2]",
       """
         |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("ret: Array[Foo] = Array(Foo(1),", output)
   }
+  
+  test("collecting objects of class defined in repl - shuffling") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Foo(i: Int)
+        |val list = List((1, Foo(1)), (1, Foo(2)))
+        |val ret = sc.parallelize(list).groupByKey().collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[(Int, Iterable[Foo])] = Array((1,", output)
+  }
 }
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index b3bd135548124..14f5e9ed4f25e 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -21,11 +21,9 @@ import java.io._
 import java.net.URLClassLoader
 
 import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.Await
 import scala.concurrent.duration._
 import scala.tools.nsc.interpreter.SparkILoop
 
-import com.google.common.io.Files
 import org.scalatest.FunSuite
 import org.apache.commons.lang3.StringEscapeUtils
 import org.apache.spark.SparkContext
@@ -128,9 +126,9 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local",
       """
         |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -144,7 +142,7 @@ class ReplSuite extends FunSuite {
         |class C {
         |def foo = 5
         |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -155,7 +153,7 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local",
       """
         |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => double(x)).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -167,9 +165,9 @@ class ReplSuite extends FunSuite {
       """
         |var v = 7
         |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -185,9 +183,9 @@ class ReplSuite extends FunSuite {
       """
         |var array = new Array[Int](5)
         |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -196,8 +194,7 @@ class ReplSuite extends FunSuite {
   }
 
   test("interacting with files") {
-    val tempDir = Files.createTempDir()
-    tempDir.deleteOnExit()
+    val tempDir = Utils.createTempDir()
     val out = new FileWriter(tempDir + "/input")
     out.write("Hello world!\n")
     out.write("What's up?\n")
@@ -224,14 +221,14 @@ class ReplSuite extends FunSuite {
       """
         |var v = 7
         |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
         |var array = new Array[Int](5)
         |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -270,7 +267,7 @@ class ReplSuite extends FunSuite {
         |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
         |import sqlContext.implicits._
         |case class TestCaseClass(value: Int)
-        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF.collect
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -283,7 +280,7 @@ class ReplSuite extends FunSuite {
       |val t = new TestClass
       |import t.testMethod
       |case class TestCaseClass(value: Int)
-      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect()
     """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -295,14 +292,14 @@ class ReplSuite extends FunSuite {
         """
           |var v = 7
           |def getV() = v
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
           |v = 10
-          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
           |var array = new Array[Int](5)
           |val broadcastArray = sc.broadcast(array)
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
           |array(0) = 5
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
         """.stripMargin)
       assertDoesNotContain("error:", output)
       assertDoesNotContain("Exception", output)
@@ -317,10 +314,22 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local[2]",
       """
         |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect()
       """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("ret: Array[Foo] = Array(Foo(1),", output)
   }
+  
+  test("collecting objects of class defined in repl - shuffling") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |case class Foo(i: Int)
+        |val list = List((1, Foo(1)), (1, Foo(2)))
+        |val ret = sc.parallelize(list).groupByKey().collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("ret: Array[(Int, Iterable[Foo])] = Array((1,", output)
+  }
 }
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 8ad026dbdf8ff..3dea2ee76542f 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 54ab13ca352d2..ea7d44a3723d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.types._
  * This is currently included mostly for illustrative purposes.  Users wanting more complete support
  * for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
  */
-class SqlParser extends AbstractSparkSQLParser {
+class SqlParser extends AbstractSparkSQLParser with DataTypeParser {
 
   def parseExpression(input: String): Expression = {
     // Initialize the Keywords.
@@ -61,11 +61,8 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val CAST = Keyword("CAST")
   protected val COALESCE = Keyword("COALESCE")
   protected val COUNT = Keyword("COUNT")
-  protected val DATE = Keyword("DATE")
-  protected val DECIMAL = Keyword("DECIMAL")
   protected val DESC = Keyword("DESC")
   protected val DISTINCT = Keyword("DISTINCT")
-  protected val DOUBLE = Keyword("DOUBLE")
   protected val ELSE = Keyword("ELSE")
   protected val END = Keyword("END")
   protected val EXCEPT = Keyword("EXCEPT")
@@ -78,7 +75,6 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val IF = Keyword("IF")
   protected val IN = Keyword("IN")
   protected val INNER = Keyword("INNER")
-  protected val INT = Keyword("INT")
   protected val INSERT = Keyword("INSERT")
   protected val INTERSECT = Keyword("INTERSECT")
   protected val INTO = Keyword("INTO")
@@ -105,13 +101,11 @@ class SqlParser extends AbstractSparkSQLParser {
   protected val SELECT = Keyword("SELECT")
   protected val SEMI = Keyword("SEMI")
   protected val SQRT = Keyword("SQRT")
-  protected val STRING = Keyword("STRING")
   protected val SUBSTR = Keyword("SUBSTR")
   protected val SUBSTRING = Keyword("SUBSTRING")
   protected val SUM = Keyword("SUM")
   protected val TABLE = Keyword("TABLE")
   protected val THEN = Keyword("THEN")
-  protected val TIMESTAMP = Keyword("TIMESTAMP")
   protected val TRUE = Keyword("TRUE")
   protected val UNION = Keyword("UNION")
   protected val UPPER = Keyword("UPPER")
@@ -315,7 +309,9 @@ class SqlParser extends AbstractSparkSQLParser {
     )
 
   protected lazy val cast: Parser[Expression] =
-    CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ { case exp ~ t => Cast(exp, t) }
+    CAST ~ "(" ~> expression ~ (AS ~> dataType) <~ ")" ^^ {
+      case exp ~ t => Cast(exp, t)
+    }
 
   protected lazy val literal: Parser[Literal] =
     ( numericLiteral
@@ -387,19 +383,4 @@ class SqlParser extends AbstractSparkSQLParser {
     (ident <~ ".") ~ ident ~ rep("." ~> ident) ^^ {
       case i1 ~ i2 ~ rest => UnresolvedAttribute((Seq(i1, i2) ++ rest).mkString("."))
     }
-
-  protected lazy val dataType: Parser[DataType] =
-    ( STRING ^^^ StringType
-    | TIMESTAMP ^^^ TimestampType
-    | DOUBLE ^^^ DoubleType
-    | fixedDecimalType
-    | DECIMAL ^^^ DecimalType.Unlimited
-    | DATE ^^^ DateType
-    | INT ^^^ IntegerType
-    )
-
-  protected lazy val fixedDecimalType: Parser[DataType] =
-    (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
-      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
-    }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7753331748d7b..92d3db077c5e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -237,22 +237,33 @@ class Analyzer(catalog: Catalog,
       // Special handling for cases when self-join introduce duplicate expression ids.
       case j @ Join(left, right, _, _) if left.outputSet.intersect(right.outputSet).nonEmpty =>
         val conflictingAttributes = left.outputSet.intersect(right.outputSet)
+        logDebug(s"Conflicting attributes ${conflictingAttributes.mkString(",")} in $j")
 
-        val (oldRelation, newRelation, attributeRewrites) = right.collect {
+        val (oldRelation, newRelation) = right.collect {
+          // Handle base relations that might appear more than once.
           case oldVersion: MultiInstanceRelation
               if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
             val newVersion = oldVersion.newInstance()
-            val newAttributes = AttributeMap(oldVersion.output.zip(newVersion.output))
-            (oldVersion, newVersion, newAttributes)
+            (oldVersion, newVersion)
+
+          // Handle projects that create conflicting aliases.
+          case oldVersion @ Project(projectList, _)
+              if findAliases(projectList).intersect(conflictingAttributes).nonEmpty =>
+            (oldVersion, oldVersion.copy(projectList = newAliases(projectList)))
+
+          case oldVersion @ Aggregate(_, aggregateExpressions, _)
+              if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
+            (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions)))
         }.head // Only handle first case found, others will be fixed on the next pass.
 
+        val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
         val newRight = right transformUp {
           case r if r == oldRelation => newRelation
+        } transformUp {
           case other => other transformExpressions {
             case a: Attribute => attributeRewrites.get(a).getOrElse(a)
           }
         }
-
         j.copy(right = newRight)
 
       case q: LogicalPlan =>
@@ -272,6 +283,17 @@ class Analyzer(catalog: Catalog,
         }
     }
 
+    def newAliases(expressions: Seq[NamedExpression]): Seq[NamedExpression] = {
+      expressions.map {
+        case a: Alias => Alias(a.child, a.name)()
+        case other => other
+      }
+    }
+
+    def findAliases(projectList: Seq[NamedExpression]): AttributeSet = {
+      AttributeSet(projectList.collect { case a: Alias => a.toAttribute })
+    }
+
     /**
      * Returns true if `exprs` contains a [[Star]].
      */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 4e8fc892f3eea..fb975ee5e7296 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -85,9 +85,8 @@ class CheckAnalysis {
 
             cleaned.foreach(checkValidAggregateExpression)
 
-          case o if o.children.nonEmpty &&
-            !o.references.filter(_.name != "grouping__id").subsetOf(o.inputSet) =>
-            val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(",")
+          case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
+            val missingAttributes = o.missingInput.map(_.prettyString).mkString(",")
             val input = o.inputSet.map(_.prettyString).mkString(",")
 
             failAnalysis(s"resolved attributes $missingAttributes missing from $input")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
index a9ba0be596349..adaeab0b5c027 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.analysis.Star
 
 protected class AttributeEquals(val a: Attribute) {
   override def hashCode() = a match {
@@ -115,7 +114,7 @@ class AttributeSet private (val baseSet: Set[AttributeEquals])
   // sorts of things in its closure.
   override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq
 
-  override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}"
+  override def toString: String = "{" + baseSet.map(_.a).mkString(", ") + "}"
 
   override def isEmpty: Boolean = baseSet.isEmpty
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
index 735b7488fdcbd..5297d1e31246c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala
@@ -346,13 +346,13 @@ case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[
       case DecimalType.Fixed(_, _) =>
         val partialSum = Alias(Sum(Cast(child, DecimalType.Unlimited)), "PartialSum")()
         SplitEvaluation(
-          Cast(Sum(partialSum.toAttribute), dataType),
+          Cast(CombineSum(partialSum.toAttribute), dataType),
           partialSum :: Nil)
 
       case _ =>
         val partialSum = Alias(Sum(child), "PartialSum")()
         SplitEvaluation(
-          Sum(partialSum.toAttribute),
+          CombineSum(partialSum.toAttribute),
           partialSum :: Nil)
     }
   }
@@ -360,6 +360,30 @@ case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[
   override def newInstance() = new SumFunction(child, this)
 }
 
+/**
+ * Sum should satisfy 3 cases:
+ * 1) sum of all null values = zero
+ * 2) sum for table column with no data = null
+ * 3) sum of column with null and not null values = sum of not null values
+ * Require separate CombineSum Expression and function as it has to distinguish "No data" case
+ * versus "data equals null" case, while aggregating results and at each partial expression.i.e.,
+ * Combining    PartitionLevel   InputData
+ *                           <-- null
+ * Zero     <-- Zero         <-- null
+ *                              
+ *          <-- null         <-- no data
+ * null     <-- null         <-- no data 
+ */
+case class CombineSum(child: Expression) extends AggregateExpression {
+  def this() = this(null)
+  
+  override def children = child :: Nil
+  override def nullable = true
+  override def dataType = child.dataType
+  override def toString = s"CombineSum($child)"
+  override def newInstance() = new CombineSumFunction(child, this)
+}
+
 case class SumDistinct(child: Expression)
   extends PartialAggregate with trees.UnaryNode[Expression] {
 
@@ -565,7 +589,8 @@ case class SumFunction(expr: Expression, base: AggregateExpression) extends Aggr
 
   private val sum = MutableLiteral(null, calcType)
 
-  private val addFunction = Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum))
+  private val addFunction = 
+    Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum, zero))
 
   override def update(input: Row): Unit = {
     sum.update(addFunction, input)
@@ -580,6 +605,43 @@ case class SumFunction(expr: Expression, base: AggregateExpression) extends Aggr
   }
 }
 
+case class CombineSumFunction(expr: Expression, base: AggregateExpression)
+  extends AggregateFunction {
+  
+  def this() = this(null, null) // Required for serialization.
+
+  private val calcType =
+    expr.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        DecimalType.Unlimited
+      case _ =>
+        expr.dataType
+    }
+
+  private val zero = Cast(Literal(0), calcType)
+
+  private val sum = MutableLiteral(null, calcType)
+
+  private val addFunction = 
+    Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum, zero))
+  
+  override def update(input: Row): Unit = {
+    val result = expr.eval(input)
+    // partial sum result can be null only when no input rows present 
+    if(result != null) {
+      sum.update(addFunction, input)
+    }
+  }
+
+  override def eval(input: Row): Any = {
+    expr.dataType match {
+      case DecimalType.Fixed(_, _) =>
+        Cast(sum, dataType).eval(null)
+      case _ => sum.eval(null)
+    }
+  }
+}
+
 case class SumDistinctFunction(expr: Expression, base: AggregateExpression)
   extends AggregateFunction {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
index 80c7dfd376c96..528e38a50a740 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.rules
-import org.apache.spark.sql.catalyst.util
+import org.apache.spark.util.Utils
 
 /**
  * A collection of generators that build custom bytecode at runtime for performing the evaluation
@@ -52,7 +52,7 @@ package object codegen {
   @DeveloperApi
   object DumpByteCode {
     import scala.sys.process._
-    val dumpDirectory = util.getTempFilePath("sparkSqlByteCode")
+    val dumpDirectory = Utils.createTempDir()
     dumpDirectory.mkdir()
 
     def apply(obj: Any): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 62c062be6d820..17f7f9fe51376 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -124,6 +124,12 @@ case class Alias(child: Expression, name: String)
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"
 
   override protected final def otherCopyArgs = exprId :: qualifiers :: Nil
+
+  override def equals(other: Any): Boolean = other match {
+    case a: Alias =>
+      name == a.name && exprId == a.exprId && child == a.child && qualifiers == a.qualifiers
+    case _ => false
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index faa366771824b..f03d6f71a9fae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -146,6 +146,27 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
     result
   }
 
+  override def equals(o: Any): Boolean = o match {
+    case other: Row =>
+      if (values.length != other.length) {
+        return false
+      }
+
+      var i = 0
+      while (i < values.length) {
+        if (isNullAt(i) != other.isNullAt(i)) {
+          return false
+        }
+        if (apply(i) != other.apply(i)) {
+          return false
+        }
+        i += 1
+      }
+      true
+
+    case _ => false
+  }
+
   def copy() = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 17a88e07de15f..400a6b2825c10 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression}
+import org.apache.spark.sql.catalyst.expressions.{VirtualColumn, Attribute, AttributeSet, Expression}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.types.{ArrayType, DataType, StructField, StructType}
 
@@ -48,7 +48,8 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
    * Subclasses should override this method if they produce attributes internally as it is used by
    * assertions designed to prevent the construction of invalid plans.
    */
-  def missingInput: AttributeSet = references -- inputSet
+  def missingInput: AttributeSet = (references -- inputSet)
+    .filter(_.name != VirtualColumn.groupingIdName)
 
   /**
    * Runs [[transform]] with `rule` on all expressions present in this query operator.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index 92bd057c6f4b6..bb79dc340553b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -54,4 +54,7 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[Row] = Nil)
       otherOutput.map(_.dataType) == output.map(_.dataType) && otherData == data
     case _ => false
   }
+
+  override lazy val statistics =
+    Statistics(sizeInBytes = output.map(_.dataType.defaultSize).sum * data.length)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 20cc8e90a71a3..384fe53a68362 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -81,6 +81,11 @@ case class Union(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
   override lazy val resolved =
     childrenResolved &&
     !left.output.zip(right.output).exists { case (l,r) => l.dataType != r.dataType }
+
+  override def statistics: Statistics = {
+    val sizeInBytes = left.statistics.sizeInBytes + right.statistics.sizeInBytes
+    Statistics(sizeInBytes = sizeInBytes)
+  }
 }
 
 case class Join(
@@ -103,6 +108,13 @@ case class Join(
         left.output ++ right.output
     }
   }
+
+  def selfJoinResolved = left.outputSet.intersect(right.outputSet).isEmpty
+
+  // Joins are only resolved if they don't introduce ambiguious expression ids.
+  override lazy val resolved: Boolean = {
+    childrenResolved && !expressions.exists(!_.resolved) && selfJoinResolved
+  }
 }
 
 case class Except(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
@@ -174,7 +186,12 @@ case class Aggregate(
 case class Expand(
     projections: Seq[GroupExpression],
     output: Seq[Attribute],
-    child: LogicalPlan) extends UnaryNode
+    child: LogicalPlan) extends UnaryNode {
+  override def statistics: Statistics = {
+    val sizeInBytes = child.statistics.sizeInBytes * projections.length
+    Statistics(sizeInBytes = sizeInBytes)
+  }
+}
 
 trait GroupingAnalytics extends UnaryNode {
   self: Product =>
@@ -272,6 +289,15 @@ case class Distinct(child: LogicalPlan) extends UnaryNode {
 
 case object NoRelation extends LeafNode {
   override def output = Nil
+
+  /**
+   * Computes [[Statistics]] for this plan. The default implementation assumes the output
+   * cardinality is the product of of all child plan's cardinality, i.e. applies in the case
+   * of cartesian joins.
+   *
+   * [[LeafNode]]s must override this.
+   */
+  override def statistics: Statistics = Statistics(sizeInBytes = 1)
 }
 
 case class Intersect(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index d8da45ae70c4b..feed50f9a2a2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -19,20 +19,9 @@ package org.apache.spark.sql.catalyst
 
 import java.io.{PrintWriter, ByteArrayOutputStream, FileInputStream, File}
 
-import org.apache.spark.util.{Utils => SparkUtils}
+import org.apache.spark.util.Utils
 
 package object util {
-  /**
-   * Returns a path to a temporary file that probably does not exist.
-   * Note, there is always the race condition that someone created this
-   * file since the last time we checked.  Thus, this shouldn't be used
-   * for anything security conscious.
-   */
-  def getTempFilePath(prefix: String, suffix: String = ""): File = {
-    val tempFile = File.createTempFile(prefix, suffix)
-    tempFile.delete()
-    tempFile
-  }
 
   def fileToString(file: File, encoding: String = "UTF-8") = {
     val inStream = new FileInputStream(file)
@@ -56,7 +45,7 @@ package object util {
   def resourceToString(
       resource:String,
       encoding: String = "UTF-8",
-      classLoader: ClassLoader = SparkUtils.getSparkClassLoader) = {
+      classLoader: ClassLoader = Utils.getSparkClassLoader) = {
     val inStream = classLoader.getResourceAsStream(resource)
     val outStream = new ByteArrayOutputStream
     try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeParser.scala
new file mode 100644
index 0000000000000..89278f7dbc806
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeParser.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import scala.language.implicitConversions
+import scala.util.matching.Regex
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+
+import org.apache.spark.sql.catalyst.SqlLexical
+
+/**
+ * This is a data type parser that can be used to parse string representations of data types
+ * provided in SQL queries. This parser is mixed in with DDLParser and SqlParser.
+ */
+private[sql] trait DataTypeParser extends StandardTokenParsers {
+
+  // This is used to create a parser from a regex. We are using regexes for data type strings
+  // since these strings can be also used as column names or field names.
+  import lexical.Identifier
+  implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch(
+    s"identifier matching regex ${regex}",
+    { case Identifier(str) if regex.unapplySeq(str).isDefined => str }
+  )
+
+  protected lazy val primitiveType: Parser[DataType] =
+    "(?i)string".r ^^^ StringType |
+    "(?i)float".r ^^^ FloatType |
+    "(?i)int".r ^^^ IntegerType |
+    "(?i)tinyint".r ^^^ ByteType |
+    "(?i)smallint".r ^^^ ShortType |
+    "(?i)double".r ^^^ DoubleType |
+    "(?i)bigint".r ^^^ LongType |
+    "(?i)binary".r ^^^ BinaryType |
+    "(?i)boolean".r ^^^ BooleanType |
+    fixedDecimalType |
+    "(?i)decimal".r ^^^ DecimalType.Unlimited |
+    "(?i)date".r ^^^ DateType |
+    "(?i)timestamp".r ^^^ TimestampType |
+    varchar
+
+  protected lazy val fixedDecimalType: Parser[DataType] =
+    ("(?i)decimal".r ~> "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
+      case precision ~ scale =>
+        DecimalType(precision.toInt, scale.toInt)
+    }
+
+  protected lazy val varchar: Parser[DataType] =
+    "(?i)varchar".r ~> "(" ~> (numericLit <~ ")") ^^^ StringType
+
+  protected lazy val arrayType: Parser[DataType] =
+    "(?i)array".r ~> "<" ~> dataType <~ ">" ^^ {
+      case tpe => ArrayType(tpe)
+    }
+
+  protected lazy val mapType: Parser[DataType] =
+    "(?i)map".r ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
+      case t1 ~ _ ~ t2 => MapType(t1, t2)
+    }
+
+  protected lazy val structField: Parser[StructField] =
+    ident ~ ":" ~ dataType ^^ {
+      case name ~ _ ~ tpe => StructField(name, tpe, nullable = true)
+    }
+
+  protected lazy val structType: Parser[DataType] =
+    ("(?i)struct".r ~> "<" ~> repsep(structField, ",") <~ ">"  ^^ {
+      case fields => new StructType(fields.toArray)
+    }) |
+    ("(?i)struct".r ~ "<>" ^^^ StructType(Nil))
+
+  protected lazy val dataType: Parser[DataType] =
+    arrayType |
+    mapType |
+    structType |
+    primitiveType
+
+  def toDataType(dataTypeString: String): DataType = synchronized {
+    phrase(dataType)(new lexical.Scanner(dataTypeString)) match {
+      case Success(result, _) => result
+      case failure: NoSuccess => throw new DataTypeException(failMessage(dataTypeString))
+    }
+  }
+
+  private def failMessage(dataTypeString: String): String = {
+    s"Unsupported dataType: $dataTypeString. If you have a struct and a field name of it has " +
+      "any special characters, please use backticks (`) to quote that field name, e.g. `x+y`. " +
+      "Please note that backtick itself is not supported in a field name."
+  }
+}
+
+private[sql] object DataTypeParser {
+  lazy val dataTypeParser = new DataTypeParser {
+    override val lexical = new SqlLexical
+  }
+
+  def apply(dataTypeString: String): DataType = dataTypeParser.toDataType(dataTypeString)
+}
+
+/** The exception thrown from the [[DataTypeParser]]. */
+protected[sql] class DataTypeException(message: String) extends Exception(message)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 21cc6cea4bf54..994c5202c15dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -246,7 +246,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     }
   }
 
-  override def equals(other: Any) = other match {
+  override def equals(other: Any): Boolean = other match {
     case d: Decimal =>
       compare(d) == 0
     case _ =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
index bf39603d13bd5..d973144de3468 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala
@@ -307,7 +307,7 @@ protected[sql] object NativeType {
 
 
 protected[sql] trait PrimitiveType extends DataType {
-  override def isPrimitive = true
+  override def isPrimitive: Boolean = true
 }
 
 
@@ -442,7 +442,7 @@ class TimestampType private() extends NativeType {
   @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
 
   private[sql] val ordering = new Ordering[JvmType] {
-    def compare(x: Timestamp, y: Timestamp) = x.compareTo(y)
+    def compare(x: Timestamp, y: Timestamp): Int = x.compareTo(y)
   }
 
   /**
@@ -542,7 +542,7 @@ class LongType private() extends IntegralType {
    */
   override def defaultSize: Int = 8
 
-  override def simpleString = "bigint"
+  override def simpleString: String = "bigint"
 
   private[spark] override def asNullable: LongType = this
 }
@@ -572,7 +572,7 @@ class IntegerType private() extends IntegralType {
    */
   override def defaultSize: Int = 4
 
-  override def simpleString = "int"
+  override def simpleString: String = "int"
 
   private[spark] override def asNullable: IntegerType = this
 }
@@ -602,7 +602,7 @@ class ShortType private() extends IntegralType {
    */
   override def defaultSize: Int = 2
 
-  override def simpleString = "smallint"
+  override def simpleString: String = "smallint"
 
   private[spark] override def asNullable: ShortType = this
 }
@@ -632,7 +632,7 @@ class ByteType private() extends IntegralType {
    */
   override def defaultSize: Int = 1
 
-  override def simpleString = "tinyint"
+  override def simpleString: String = "tinyint"
 
   private[spark] override def asNullable: ByteType = this
 }
@@ -696,7 +696,7 @@ case class DecimalType(precisionInfo: Option[PrecisionInfo]) extends FractionalT
    */
   override def defaultSize: Int = 4096
 
-  override def simpleString = precisionInfo match {
+  override def simpleString: String = precisionInfo match {
     case Some(PrecisionInfo(precision, scale)) => s"decimal($precision,$scale)"
     case None => "decimal(10,0)"
   }
@@ -836,7 +836,7 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
    */
   override def defaultSize: Int = 100 * elementType.defaultSize
 
-  override def simpleString = s"array<${elementType.simpleString}>"
+  override def simpleString: String = s"array<${elementType.simpleString}>"
 
   private[spark] override def asNullable: ArrayType =
     ArrayType(elementType.asNullable, containsNull = true)
@@ -1065,7 +1065,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    */
   override def defaultSize: Int = fields.map(_.dataType.defaultSize).sum
 
-  override def simpleString = {
+  override def simpleString: String = {
     val fieldTypes = fields.map(field => s"${field.name}:${field.dataType.simpleString}")
     s"struct<${fieldTypes.mkString(",")}>"
   }
@@ -1142,7 +1142,7 @@ case class MapType(
    */
   override def defaultSize: Int = 100 * (keyType.defaultSize + valueType.defaultSize)
 
-  override def simpleString = s"map<${keyType.simpleString},${valueType.simpleString}>"
+  override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>"
 
   private[spark] override def asNullable: MapType =
     MapType(keyType.asNullable, valueType.asNullable, valueContainsNull = true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index 85798d0871fda..ecbb54218d457 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.scalatest.FunSuite
+import org.apache.spark.sql.catalyst.plans.PlanTest
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.types._
 
-class HiveTypeCoercionSuite extends FunSuite {
+class HiveTypeCoercionSuite extends PlanTest {
 
   test("tightest common bound for types") {
     def widenTest(t1: DataType, t2: DataType, tightestCommon: Option[DataType]) {
@@ -106,7 +106,8 @@ class HiveTypeCoercionSuite extends FunSuite {
     val booleanCasts = new HiveTypeCoercion { }.BooleanCasts
     def ruleTest(initial: Expression, transformed: Expression) {
       val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
-      assert(booleanCasts(Project(Seq(Alias(initial, "a")()), testRelation)) ==
+      comparePlans(
+        booleanCasts(Project(Seq(Alias(initial, "a")()), testRelation)),
         Project(Seq(Alias(transformed, "a")()), testRelation))
     }
     // Remove superflous boolean -> boolean casts.
@@ -119,7 +120,8 @@ class HiveTypeCoercionSuite extends FunSuite {
     val fac = new HiveTypeCoercion { }.FunctionArgumentConversion
     def ruleTest(initial: Expression, transformed: Expression) {
       val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
-      assert(fac(Project(Seq(Alias(initial, "a")()), testRelation)) ==
+      comparePlans(
+        fac(Project(Seq(Alias(initial, "a")()), testRelation)),
         Project(Seq(Alias(transformed, "a")()), testRelation))
     }
     ruleTest(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 7d609b91389c6..48884040bfce7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.plans
 
 import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{NoRelation, Filter, LogicalPlan}
 import org.apache.spark.sql.catalyst.util._
 
 /**
@@ -36,6 +36,8 @@ class PlanTest extends FunSuite {
     plan transformAllExpressions {
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
+      case a: Alias =>
+        Alias(a.child, a.name)(exprId = ExprId(0))
     }
   }
 
@@ -50,4 +52,9 @@ class PlanTest extends FunSuite {
           |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")}
         """.stripMargin)
   }
+
+  /** Fails the test if the two expressions do not match */
+  protected def compareExpressions(e1: Expression, e2: Expression): Unit = {
+    comparePlans(Filter(e1, NoRelation), Filter(e2, NoRelation))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeParserSuite.scala
new file mode 100644
index 0000000000000..1ba21b64603ac
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeParserSuite.scala
@@ -0,0 +1,116 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.types
+
+import org.scalatest.FunSuite
+
+class DataTypeParserSuite extends FunSuite {
+
+  def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
+    test(s"parse ${dataTypeString.replace("\n", "")}") {
+      assert(DataTypeParser(dataTypeString) === expectedDataType)
+    }
+  }
+
+  def unsupported(dataTypeString: String): Unit = {
+    test(s"$dataTypeString is not supported") {
+      intercept[DataTypeException](DataTypeParser(dataTypeString))
+    }
+  }
+
+  checkDataType("int", IntegerType)
+  checkDataType("BooLean", BooleanType)
+  checkDataType("tinYint", ByteType)
+  checkDataType("smallINT", ShortType)
+  checkDataType("INT", IntegerType)
+  checkDataType("bigint", LongType)
+  checkDataType("float", FloatType)
+  checkDataType("dOUBle", DoubleType)
+  checkDataType("decimal(10, 5)", DecimalType(10, 5))
+  checkDataType("decimal", DecimalType.Unlimited)
+  checkDataType("DATE", DateType)
+  checkDataType("timestamp", TimestampType)
+  checkDataType("string", StringType)
+  checkDataType("varchAr(20)", StringType)
+  checkDataType("BINARY", BinaryType)
+
+  checkDataType("array<doublE>", ArrayType(DoubleType, true))
+  checkDataType("Array<map<int, tinYint>>", ArrayType(MapType(IntegerType, ByteType, true), true))
+  checkDataType(
+    "array<struct<tinYint:tinyint>>",
+    ArrayType(StructType(StructField("tinYint", ByteType, true) :: Nil), true)
+  )
+  checkDataType("MAP<int, STRING>", MapType(IntegerType, StringType, true))
+  checkDataType("MAp<int, ARRAY<double>>", MapType(IntegerType, ArrayType(DoubleType), true))
+  checkDataType(
+    "MAP<int, struct<varchar:string>>",
+    MapType(IntegerType, StructType(StructField("varchar", StringType, true) :: Nil), true)
+  )
+
+  checkDataType(
+    "struct<intType: int, ts:timestamp>",
+    StructType(
+      StructField("intType", IntegerType, true) ::
+      StructField("ts", TimestampType, true) :: Nil)
+  )
+  // It is fine to use the data type string as the column name.
+  checkDataType(
+    "Struct<int: int, timestamp:timestamp>",
+    StructType(
+      StructField("int", IntegerType, true) ::
+      StructField("timestamp", TimestampType, true) :: Nil)
+  )
+  checkDataType(
+    """
+      |struct<
+      |  struct:struct<deciMal:DECimal, anotherDecimal:decimAL(5,2)>,
+      |  MAP:Map<timestamp, varchar(10)>,
+      |  arrAy:Array<double>>
+    """.stripMargin,
+    StructType(
+      StructField("struct",
+        StructType(
+          StructField("deciMal", DecimalType.Unlimited, true) ::
+          StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
+      StructField("MAP", MapType(TimestampType, StringType), true) ::
+      StructField("arrAy", ArrayType(DoubleType, true), true) :: Nil)
+  )
+  // A column name can be a reserved word in our DDL parser and SqlParser.
+  checkDataType(
+    "Struct<TABLE: string, CASE:boolean>",
+    StructType(
+      StructField("TABLE", StringType, true) ::
+      StructField("CASE", BooleanType, true) :: Nil)
+  )
+  // Use backticks to quote column names having special characters.
+  checkDataType(
+    "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
+    StructType(
+      StructField("x+y", IntegerType, true) ::
+      StructField("!@#$%^&*()", StringType, true) ::
+      StructField("1_2.345<>:\"", StringType, true) :: Nil)
+  )
+  // Empty struct.
+  checkDataType("strUCt<>", StructType(Nil))
+
+  unsupported("it is not a data type")
+  unsupported("struct<x+y: int, 1.1:timestamp>")
+  unsupported("struct<x: int")
+  unsupported("struct<x int, y string>")
+  unsupported("struct<`x``y` int>")
+}
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 3640104e497d4..e3a6b1fe72435 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 908c78a4d3f10..ec7d15f5bc4e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -59,7 +59,7 @@ class Column(protected[sql] val expr: Expression) {
 
   override def toString: String = expr.prettyString
 
-  override def equals(that: Any) = that match {
+  override def equals(that: Any): Boolean = that match {
     case that: Column => that.expr.equals(this.expr)
     case _ => false
   }
@@ -624,20 +624,7 @@ class Column(protected[sql] val expr: Expression) {
    *
    * @group expr_ops
    */
-  def cast(to: String): Column = cast(to.toLowerCase match {
-    case "string" | "str" => StringType
-    case "boolean" => BooleanType
-    case "byte" => ByteType
-    case "short" => ShortType
-    case "int" => IntegerType
-    case "long" => LongType
-    case "float" => FloatType
-    case "double" => DoubleType
-    case "decimal" => DecimalType.Unlimited
-    case "date" => DateType
-    case "timestamp" => TimestampType
-    case _ => throw new RuntimeException(s"""Unsupported cast type: "$to"""")
-  })
+  def cast(to: String): Column = cast(DataTypeParser(to))
 
   /**
    * Returns an ordering used in sorting.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 46f50708a9184..5aece166aad22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -33,7 +33,7 @@ import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.sql.catalyst.{ScalaReflection, SqlParser}
+import org.apache.spark.sql.catalyst.{expressions, ScalaReflection, SqlParser}
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedRelation, ResolvedStar}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{JoinType, Inner}
@@ -89,7 +89,7 @@ private[sql] object DataFrame {
  *   val people = sqlContext.parquetFile("...")
  *   val department = sqlContext.parquetFile("...")
  *
- *   people.filter("age" > 30)
+ *   people.filter("age > 30")
  *     .join(department, people("deptId") === department("id"))
  *     .groupBy(department("name"), "gender")
  *     .agg(avg(people("salary")), max(people("age")))
@@ -722,7 +722,7 @@ class DataFrame private[sql](
     : DataFrame = {
     val dataType = ScalaReflection.schemaFor[B].dataType
     val attributes = AttributeReference(outputColumn, dataType)() :: Nil
-    def rowFunction(row: Row) = {
+    def rowFunction(row: Row): TraversableOnce[Row] = {
       f(row(0).asInstanceOf[A]).map(o => Row(ScalaReflection.convertToCatalyst(o, dataType)))
     }
     val generator = UserDefinedGenerator(attributes, rowFunction, apply(inputColumn).expr :: Nil)
@@ -1155,7 +1155,7 @@ class DataFrame private[sql](
       val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
 
       new Iterator[String] {
-        override def hasNext = iter.hasNext
+        override def hasNext: Boolean = iter.hasNext
         override def next(): String = {
           JsonRDD.rowToJSON(rowSchema, gen)(iter.next())
           gen.flush()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 9c49e84bf9680..dc9912b52dcab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -63,8 +63,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   def this(sparkContext: JavaSparkContext) = this(sparkContext.sc)
 
-  // Note that this is a lazy val so we can override the default value in subclasses.
-  protected[sql] lazy val conf: SQLConf = new SQLConf
+  /**
+   * @return Spark SQL configuration
+   */
+  protected[sql] def conf = tlSession.get().conf
 
   /**
    * Set Spark SQL configuration properties.
@@ -103,9 +105,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
 
+  // TODO how to handle the temp table per user session?
   @transient
   protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true)
 
+  // TODO how to handle the temp function per user session?
   @transient
   protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(true)
 
@@ -138,6 +142,14 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   protected[sql] def executePlan(plan: LogicalPlan) = new this.QueryExecution(plan)
 
+  @transient
+  protected[sql] val tlSession = new ThreadLocal[SQLSession]() {
+    override def initialValue: SQLSession = defaultSession
+  }
+
+  @transient
+  protected[sql] val defaultSession = createSession()
+
   sparkContext.getConf.getAll.foreach {
     case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
     case _ =>
@@ -194,6 +206,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * TODO move to SQLSession?
    */
   @transient
   val udf: UDFRegistration = new UDFRegistration(this)
@@ -229,8 +242,8 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * common Scala objects into [[DataFrame]]s.
    *
    * {{{
-   *   val sqlContext = new SQLContext
-   *   import sqlContext._
+   *   val sqlContext = new SQLContext(sc)
+   *   import sqlContext.implicits._
    * }}}
    *
    * @group basic
@@ -975,9 +988,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
     val sqlContext: SQLContext = self
 
-    def codegenEnabled = self.conf.codegenEnabled
+    def codegenEnabled: Boolean = self.conf.codegenEnabled
 
-    def numPartitions = self.conf.numShufflePartitions
+    def numPartitions: Int = self.conf.numShufflePartitions
 
     def strategies: Seq[Strategy] =
       experimental.extraStrategies ++ (
@@ -1059,6 +1072,32 @@ class SQLContext(@transient val sparkContext: SparkContext)
     )
   }
 
+
+  protected[sql] def openSession(): SQLSession = {
+    detachSession()
+    val session = createSession()
+    tlSession.set(session)
+
+    session
+  }
+
+  protected[sql] def currentSession(): SQLSession = {
+    tlSession.get()
+  }
+
+  protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[sql] def detachSession(): Unit = {
+    tlSession.remove()
+  }
+
+  protected[sql] class SQLSession {
+    // Note that this is a lazy val so we can override the default value in subclasses.
+    protected[sql] lazy val conf: SQLConf = new SQLConf
+  }
+
   /**
    * :: DeveloperApi ::
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy
@@ -1070,7 +1109,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
     lazy val analyzed: LogicalPlan = analyzer(logical)
     lazy val withCachedData: LogicalPlan = {
-      assertAnalyzed
+      assertAnalyzed()
       cacheManager.useCachedData(analyzed)
     }
     lazy val optimizedPlan: LogicalPlan = optimizer(withCachedData)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 8051df299252c..b97aaf73529a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -61,7 +61,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging {
 
     val dataType = sqlContext.parseDataType(stringDataType)
 
-    def builder(e: Seq[Expression]) =
+    def builder(e: Seq[Expression]): PythonUDF =
       PythonUDF(
         name,
         command,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
index b615eaa0dca0d..f615fb33a7c35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnAccessor.scala
@@ -48,9 +48,9 @@ private[sql] abstract class BasicColumnAccessor[T <: DataType, JvmType](
 
   protected def initialize() {}
 
-  def hasNext = buffer.hasRemaining
+  override def hasNext: Boolean = buffer.hasRemaining
 
-  def extractTo(row: MutableRow, ordinal: Int): Unit = {
+  override def extractTo(row: MutableRow, ordinal: Int): Unit = {
     extractSingle(row, ordinal)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
index d8d24a577347c..c881747751520 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnBuilder.scala
@@ -58,7 +58,7 @@ private[sql] class BasicColumnBuilder[T <: DataType, JvmType](
   override def initialize(
       initialSize: Int,
       columnName: String = "",
-      useCompression: Boolean = false) = {
+      useCompression: Boolean = false): Unit = {
 
     val size = if (initialSize == 0) DEFAULT_INITIAL_BUFFER_SIZE else initialSize
     this.columnName = columnName
@@ -73,7 +73,7 @@ private[sql] class BasicColumnBuilder[T <: DataType, JvmType](
     columnType.append(row, ordinal, buffer)
   }
 
-  override def build() = {
+  override def build(): ByteBuffer = {
     buffer.flip().asInstanceOf[ByteBuffer]
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
index 04047b9c062be..87a6631da8300 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
@@ -76,7 +76,7 @@ private[sql] sealed trait ColumnStats extends Serializable {
 private[sql] class NoopColumnStats extends ColumnStats {
   override def gatherStats(row: Row, ordinal: Int): Unit = super.gatherStats(row, ordinal)
 
-  def collectedStatistics = Row(null, null, nullCount, count, 0L)
+  override def collectedStatistics: Row = Row(null, null, nullCount, count, 0L)
 }
 
 private[sql] class BooleanColumnStats extends ColumnStats {
@@ -93,7 +93,7 @@ private[sql] class BooleanColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class ByteColumnStats extends ColumnStats {
@@ -110,7 +110,7 @@ private[sql] class ByteColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class ShortColumnStats extends ColumnStats {
@@ -127,7 +127,7 @@ private[sql] class ShortColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class LongColumnStats extends ColumnStats {
@@ -144,7 +144,7 @@ private[sql] class LongColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class DoubleColumnStats extends ColumnStats {
@@ -161,7 +161,7 @@ private[sql] class DoubleColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class FloatColumnStats extends ColumnStats {
@@ -178,7 +178,7 @@ private[sql] class FloatColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class FixedDecimalColumnStats extends ColumnStats {
@@ -212,7 +212,7 @@ private[sql] class IntColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class StringColumnStats extends ColumnStats {
@@ -229,7 +229,7 @@ private[sql] class StringColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class DateColumnStats extends IntColumnStats
@@ -248,7 +248,7 @@ private[sql] class TimestampColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(lower, upper, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(lower, upper, nullCount, count, sizeInBytes)
 }
 
 private[sql] class BinaryColumnStats extends ColumnStats {
@@ -259,7 +259,7 @@ private[sql] class BinaryColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(null, null, nullCount, count, sizeInBytes)
 }
 
 private[sql] class GenericColumnStats extends ColumnStats {
@@ -270,5 +270,5 @@ private[sql] class GenericColumnStats extends ColumnStats {
     }
   }
 
-  def collectedStatistics = Row(null, null, nullCount, count, sizeInBytes)
+  override def collectedStatistics: Row = Row(null, null, nullCount, count, sizeInBytes)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
index 36ea1c77e0470..c47497e0662d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnType.scala
@@ -98,7 +98,7 @@ private[sql] sealed abstract class ColumnType[T <: DataType, JvmType](
    */
   def clone(v: JvmType): JvmType = v
 
-  override def toString = getClass.getSimpleName.stripSuffix("$")
+  override def toString: String = getClass.getSimpleName.stripSuffix("$")
 }
 
 private[sql] abstract class NativeColumnType[T <: NativeType](
@@ -114,7 +114,7 @@ private[sql] abstract class NativeColumnType[T <: NativeType](
 }
 
 private[sql] object INT extends NativeColumnType(IntegerType, 0, 4) {
-  def append(v: Int, buffer: ByteBuffer): Unit = {
+  override def append(v: Int, buffer: ByteBuffer): Unit = {
     buffer.putInt(v)
   }
 
@@ -122,7 +122,7 @@ private[sql] object INT extends NativeColumnType(IntegerType, 0, 4) {
     buffer.putInt(row.getInt(ordinal))
   }
 
-  def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Int = {
     buffer.getInt()
   }
 
@@ -134,7 +134,7 @@ private[sql] object INT extends NativeColumnType(IntegerType, 0, 4) {
     row.setInt(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getInt(ordinal)
+  override def getField(row: Row, ordinal: Int): Int = row.getInt(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setInt(toOrdinal, from.getInt(fromOrdinal))
@@ -150,7 +150,7 @@ private[sql] object LONG extends NativeColumnType(LongType, 1, 8) {
     buffer.putLong(row.getLong(ordinal))
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Long = {
     buffer.getLong()
   }
 
@@ -162,7 +162,7 @@ private[sql] object LONG extends NativeColumnType(LongType, 1, 8) {
     row.setLong(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getLong(ordinal)
+  override def getField(row: Row, ordinal: Int): Long = row.getLong(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setLong(toOrdinal, from.getLong(fromOrdinal))
@@ -178,7 +178,7 @@ private[sql] object FLOAT extends NativeColumnType(FloatType, 2, 4) {
     buffer.putFloat(row.getFloat(ordinal))
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Float = {
     buffer.getFloat()
   }
 
@@ -190,7 +190,7 @@ private[sql] object FLOAT extends NativeColumnType(FloatType, 2, 4) {
     row.setFloat(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getFloat(ordinal)
+  override def getField(row: Row, ordinal: Int): Float = row.getFloat(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setFloat(toOrdinal, from.getFloat(fromOrdinal))
@@ -206,7 +206,7 @@ private[sql] object DOUBLE extends NativeColumnType(DoubleType, 3, 8) {
     buffer.putDouble(row.getDouble(ordinal))
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Double = {
     buffer.getDouble()
   }
 
@@ -218,7 +218,7 @@ private[sql] object DOUBLE extends NativeColumnType(DoubleType, 3, 8) {
     row.setDouble(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getDouble(ordinal)
+  override def getField(row: Row, ordinal: Int): Double = row.getDouble(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setDouble(toOrdinal, from.getDouble(fromOrdinal))
@@ -234,7 +234,7 @@ private[sql] object BOOLEAN extends NativeColumnType(BooleanType, 4, 1) {
     buffer.put(if (row.getBoolean(ordinal)) 1: Byte else 0: Byte)
   }
 
-  override def extract(buffer: ByteBuffer) = buffer.get() == 1
+  override def extract(buffer: ByteBuffer): Boolean = buffer.get() == 1
 
   override def extract(buffer: ByteBuffer, row: MutableRow, ordinal: Int): Unit = {
     row.setBoolean(ordinal, buffer.get() == 1)
@@ -244,7 +244,7 @@ private[sql] object BOOLEAN extends NativeColumnType(BooleanType, 4, 1) {
     row.setBoolean(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getBoolean(ordinal)
+  override def getField(row: Row, ordinal: Int): Boolean = row.getBoolean(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setBoolean(toOrdinal, from.getBoolean(fromOrdinal))
@@ -260,7 +260,7 @@ private[sql] object BYTE extends NativeColumnType(ByteType, 5, 1) {
     buffer.put(row.getByte(ordinal))
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Byte = {
     buffer.get()
   }
 
@@ -272,7 +272,7 @@ private[sql] object BYTE extends NativeColumnType(ByteType, 5, 1) {
     row.setByte(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getByte(ordinal)
+  override def getField(row: Row, ordinal: Int): Byte = row.getByte(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setByte(toOrdinal, from.getByte(fromOrdinal))
@@ -288,7 +288,7 @@ private[sql] object SHORT extends NativeColumnType(ShortType, 6, 2) {
     buffer.putShort(row.getShort(ordinal))
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Short = {
     buffer.getShort()
   }
 
@@ -300,7 +300,7 @@ private[sql] object SHORT extends NativeColumnType(ShortType, 6, 2) {
     row.setShort(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getShort(ordinal)
+  override def getField(row: Row, ordinal: Int): Short = row.getShort(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setShort(toOrdinal, from.getShort(fromOrdinal))
@@ -317,7 +317,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
     buffer.putInt(stringBytes.length).put(stringBytes, 0, stringBytes.length)
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): String = {
     val length = buffer.getInt()
     val stringBytes = new Array[Byte](length)
     buffer.get(stringBytes, 0, length)
@@ -328,7 +328,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
     row.setString(ordinal, value)
   }
 
-  override def getField(row: Row, ordinal: Int) = row.getString(ordinal)
+  override def getField(row: Row, ordinal: Int): String = row.getString(ordinal)
 
   override def copyField(from: Row, fromOrdinal: Int, to: MutableRow, toOrdinal: Int): Unit = {
     to.setString(toOrdinal, from.getString(fromOrdinal))
@@ -336,7 +336,7 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
 }
 
 private[sql] object DATE extends NativeColumnType(DateType, 8, 4) {
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Int = {
     buffer.getInt
   }
 
@@ -344,7 +344,7 @@ private[sql] object DATE extends NativeColumnType(DateType, 8, 4) {
     buffer.putInt(v)
   }
 
-  override def getField(row: Row, ordinal: Int) = {
+  override def getField(row: Row, ordinal: Int): Int = {
     row(ordinal).asInstanceOf[Int]
   }
 
@@ -354,7 +354,7 @@ private[sql] object DATE extends NativeColumnType(DateType, 8, 4) {
 }
 
 private[sql] object TIMESTAMP extends NativeColumnType(TimestampType, 9, 12) {
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Timestamp = {
     val timestamp = new Timestamp(buffer.getLong())
     timestamp.setNanos(buffer.getInt())
     timestamp
@@ -364,7 +364,7 @@ private[sql] object TIMESTAMP extends NativeColumnType(TimestampType, 9, 12) {
     buffer.putLong(v.getTime).putInt(v.getNanos)
   }
 
-  override def getField(row: Row, ordinal: Int) = {
+  override def getField(row: Row, ordinal: Int): Timestamp = {
     row(ordinal).asInstanceOf[Timestamp]
   }
 
@@ -405,7 +405,7 @@ private[sql] sealed abstract class ByteArrayColumnType[T <: DataType](
     defaultSize: Int)
   extends ColumnType[T, Array[Byte]](typeId, defaultSize) {
 
-  override def actualSize(row: Row, ordinal: Int) = {
+  override def actualSize(row: Row, ordinal: Int): Int = {
     getField(row, ordinal).length + 4
   }
 
@@ -413,7 +413,7 @@ private[sql] sealed abstract class ByteArrayColumnType[T <: DataType](
     buffer.putInt(v.length).put(v, 0, v.length)
   }
 
-  override def extract(buffer: ByteBuffer) = {
+  override def extract(buffer: ByteBuffer): Array[Byte] = {
     val length = buffer.getInt()
     val bytes = new Array[Byte](length)
     buffer.get(bytes, 0, length)
@@ -426,7 +426,9 @@ private[sql] object BINARY extends ByteArrayColumnType[BinaryType.type](11, 16)
     row(ordinal) = value
   }
 
-  override def getField(row: Row, ordinal: Int) = row(ordinal).asInstanceOf[Array[Byte]]
+  override def getField(row: Row, ordinal: Int): Array[Byte] = {
+    row(ordinal).asInstanceOf[Array[Byte]]
+  }
 }
 
 // Used to process generic objects (all types other than those listed above). Objects should be
@@ -437,7 +439,9 @@ private[sql] object GENERIC extends ByteArrayColumnType[DataType](12, 16) {
     row(ordinal) = SparkSqlSerializer.deserialize[Any](value)
   }
 
-  override def getField(row: Row, ordinal: Int) = SparkSqlSerializer.serialize(row(ordinal))
+  override def getField(row: Row, ordinal: Int): Array[Byte] = {
+    SparkSqlSerializer.serialize(row(ordinal))
+  }
 }
 
 private[sql] object ColumnType {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
index 387faee12b3cd..6eee0c86d6a1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala
@@ -19,6 +19,9 @@ package org.apache.spark.sql.columnar
 
 import java.nio.ByteBuffer
 
+import org.apache.spark.Accumulator
+import org.apache.spark.sql.catalyst.expressions
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.rdd.RDD
@@ -77,20 +80,23 @@ private[sql] case class InMemoryRelation(
     _statistics
   }
 
-  override def statistics = if (_statistics == null) {
-    if (batchStats.value.isEmpty) {
-      // Underlying columnar RDD hasn't been materialized, no useful statistics information
-      // available, return the default statistics.
-      Statistics(sizeInBytes = child.sqlContext.conf.defaultSizeInBytes)
+  override def statistics: Statistics = {
+    if (_statistics == null) {
+      if (batchStats.value.isEmpty) {
+        // Underlying columnar RDD hasn't been materialized, no useful statistics information
+        // available, return the default statistics.
+        Statistics(sizeInBytes = child.sqlContext.conf.defaultSizeInBytes)
+      } else {
+        // Underlying columnar RDD has been materialized, required information has also been
+        // collected via the `batchStats` accumulator, compute the final statistics,
+        // and update `_statistics`.
+        _statistics = Statistics(sizeInBytes = computeSizeInBytes)
+        _statistics
+      }
     } else {
-      // Underlying columnar RDD has been materialized, required information has also been collected
-      // via the `batchStats` accumulator, compute the final statistics, and update `_statistics`.
-      _statistics = Statistics(sizeInBytes = computeSizeInBytes)
+      // Pre-computed statistics
       _statistics
     }
-  } else {
-    // Pre-computed statistics
-    _statistics
   }
 
   // If the cached column buffers were not passed in, we calculate them in the constructor.
@@ -99,7 +105,7 @@ private[sql] case class InMemoryRelation(
     buildBuffers()
   }
 
-  def recache() = {
+  def recache(): Unit = {
     _cachedColumnBuffers.unpersist()
     _cachedColumnBuffers = null
     buildBuffers()
@@ -109,7 +115,7 @@ private[sql] case class InMemoryRelation(
     val output = child.output
     val cached = child.execute().mapPartitions { rowIterator =>
       new Iterator[CachedBatch] {
-        def next() = {
+        def next(): CachedBatch = {
           val columnBuilders = output.map { attribute =>
             val columnType = ColumnType(attribute.dataType)
             val initialBufferSize = columnType.defaultSize * batchSize
@@ -144,7 +150,7 @@ private[sql] case class InMemoryRelation(
           CachedBatch(columnBuilders.map(_.build().array()), stats)
         }
 
-        def hasNext = rowIterator.hasNext
+        def hasNext: Boolean = rowIterator.hasNext
       }
     }.persist(storageLevel)
 
@@ -158,9 +164,9 @@ private[sql] case class InMemoryRelation(
       _cachedColumnBuffers, statisticsToBePropagated)
   }
 
-  override def children = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
 
-  override def newInstance() = {
+  override def newInstance(): this.type = {
     new InMemoryRelation(
       output.map(_.newInstance()),
       useCompression,
@@ -172,7 +178,7 @@ private[sql] case class InMemoryRelation(
       statisticsToBePropagated).asInstanceOf[this.type]
   }
 
-  def cachedColumnBuffers = _cachedColumnBuffers
+  def cachedColumnBuffers: RDD[CachedBatch] = _cachedColumnBuffers
 
   override protected def otherCopyArgs: Seq[AnyRef] =
     Seq(_cachedColumnBuffers, statisticsToBePropagated)
@@ -220,7 +226,7 @@ private[sql] case class InMemoryColumnarTableScan(
     case IsNotNull(a: Attribute) => statsFor(a).count - statsFor(a).nullCount > 0
   }
 
-  val partitionFilters = {
+  val partitionFilters: Seq[Expression] = {
     predicates.flatMap { p =>
       val filter = buildFilter.lift(p)
       val boundFilter =
@@ -239,12 +245,12 @@ private[sql] case class InMemoryColumnarTableScan(
   }
 
   // Accumulators used for testing purposes
-  val readPartitions = sparkContext.accumulator(0)
-  val readBatches = sparkContext.accumulator(0)
+  val readPartitions: Accumulator[Int] = sparkContext.accumulator(0)
+  val readBatches: Accumulator[Int] = sparkContext.accumulator(0)
 
   private val inMemoryPartitionPruningEnabled = sqlContext.conf.inMemoryPartitionPruning
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     readPartitions.setValue(0)
     readBatches.setValue(0)
 
@@ -271,7 +277,7 @@ private[sql] case class InMemoryColumnarTableScan(
 
       val nextRow = new SpecificMutableRow(requestedColumnDataTypes)
 
-      def cachedBatchesToRows(cacheBatches: Iterator[CachedBatch]) = {
+      def cachedBatchesToRows(cacheBatches: Iterator[CachedBatch]): Iterator[Row] = {
         val rows = cacheBatches.flatMap { cachedBatch =>
           // Build column accessors
           val columnAccessors = requestedColumnIndices.map { batchColumnIndex =>
@@ -283,7 +289,7 @@ private[sql] case class InMemoryColumnarTableScan(
           // Extract rows via column accessors
           new Iterator[Row] {
             private[this] val rowLen = nextRow.length
-            override def next() = {
+            override def next(): Row = {
               var i = 0
               while (i < rowLen) {
                 columnAccessors(i).extractTo(nextRow, i)
@@ -292,7 +298,7 @@ private[sql] case class InMemoryColumnarTableScan(
               nextRow
             }
 
-            override def hasNext = columnAccessors(0).hasNext
+            override def hasNext: Boolean = columnAccessors(0).hasNext
           }
         }
 
@@ -308,7 +314,7 @@ private[sql] case class InMemoryColumnarTableScan(
         if (inMemoryPartitionPruningEnabled) {
           cachedBatchIterator.filter { cachedBatch =>
             if (!partitionFilter(cachedBatch.stats)) {
-              def statsString = relation.partitionStatistics.schema
+              def statsString: String = relation.partitionStatistics.schema
                 .zip(cachedBatch.stats.toSeq)
                 .map { case (a, s) => s"${a.name}: $s" }
                 .mkString(", ")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
index 965782a40031b..4d35650d4b1eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/NullableColumnAccessor.scala
@@ -55,5 +55,5 @@ private[sql] trait NullableColumnAccessor extends ColumnAccessor {
     pos += 1
   }
 
-  abstract override def hasNext = seenNulls < nullCount || super.hasNext
+  abstract override def hasNext: Boolean = seenNulls < nullCount || super.hasNext
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
index 7dff9deac8dc0..d0b602a834dfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnAccessor.scala
@@ -26,12 +26,12 @@ private[sql] trait CompressibleColumnAccessor[T <: NativeType] extends ColumnAcc
 
   private var decoder: Decoder[T] = _
 
-  abstract override protected def initialize() = {
+  abstract override protected def initialize(): Unit = {
     super.initialize()
     decoder = CompressionScheme(underlyingBuffer.getInt()).decoder(buffer, columnType)
   }
 
-  abstract override def hasNext = super.hasNext || decoder.hasNext
+  abstract override def hasNext: Boolean = super.hasNext || decoder.hasNext
 
   override def extractSingle(row: MutableRow, ordinal: Int): Unit = {
     decoder.next(row, ordinal)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
index aead768ecdf0a..b9cfc5df550d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/CompressibleColumnBuilder.scala
@@ -81,7 +81,7 @@ private[sql] trait CompressibleColumnBuilder[T <: NativeType]
     }
   }
 
-  override def build() = {
+  override def build(): ByteBuffer = {
     val nonNullBuffer = buildNonNulls()
     val typeId = nonNullBuffer.getInt()
     val encoder: Encoder[T] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
index 68a5b1de7691b..8727d71c48bb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/compression/compressionSchemes.scala
@@ -33,22 +33,23 @@ import org.apache.spark.util.Utils
 private[sql] case object PassThrough extends CompressionScheme {
   override val typeId = 0
 
-  override def supports(columnType: ColumnType[_, _]) = true
+  override def supports(columnType: ColumnType[_, _]): Boolean = true
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): Encoder[T] = {
     new this.Encoder[T](columnType)
   }
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](
+      buffer: ByteBuffer, columnType: NativeColumnType[T]): Decoder[T] = {
     new this.Decoder(buffer, columnType)
   }
 
   class Encoder[T <: NativeType](columnType: NativeColumnType[T]) extends compression.Encoder[T] {
-    override def uncompressedSize = 0
+    override def uncompressedSize: Int = 0
 
-    override def compressedSize = 0
+    override def compressedSize: Int = 0
 
-    override def compress(from: ByteBuffer, to: ByteBuffer) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
       // Writes compression type ID and copies raw contents
       to.putInt(PassThrough.typeId).put(from).rewind()
       to
@@ -62,22 +63,23 @@ private[sql] case object PassThrough extends CompressionScheme {
       columnType.extract(buffer, row, ordinal)
     }
 
-    override def hasNext = buffer.hasRemaining
+    override def hasNext: Boolean = buffer.hasRemaining
   }
 }
 
 private[sql] case object RunLengthEncoding extends CompressionScheme {
   override val typeId = 1
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): Encoder[T] = {
     new this.Encoder[T](columnType)
   }
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](
+      buffer: ByteBuffer, columnType: NativeColumnType[T]): Decoder[T] = {
     new this.Decoder(buffer, columnType)
   }
 
-  override def supports(columnType: ColumnType[_, _]) = columnType match {
+  override def supports(columnType: ColumnType[_, _]): Boolean = columnType match {
     case INT | LONG | SHORT | BYTE | STRING | BOOLEAN => true
     case _ => false
   }
@@ -90,9 +92,9 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
     private val lastValue = new SpecificMutableRow(Seq(columnType.dataType))
     private var lastRun = 0
 
-    override def uncompressedSize = _uncompressedSize
+    override def uncompressedSize: Int = _uncompressedSize
 
-    override def compressedSize = _compressedSize
+    override def compressedSize: Int = _compressedSize
 
     override def gatherCompressibilityStats(row: Row, ordinal: Int): Unit = {
       val value = columnType.getField(row, ordinal)
@@ -114,7 +116,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
       }
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
       to.putInt(RunLengthEncoding.typeId)
 
       if (from.hasRemaining) {
@@ -169,7 +171,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
       columnType.setField(row, ordinal, currentValue)
     }
 
-    override def hasNext = valueCount < run || buffer.hasRemaining
+    override def hasNext: Boolean = valueCount < run || buffer.hasRemaining
   }
 }
 
@@ -179,15 +181,16 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
   // 32K unique values allowed
   val MAX_DICT_SIZE = Short.MaxValue
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
+    : Decoder[T] = {
     new this.Decoder(buffer, columnType)
   }
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): Encoder[T] = {
     new this.Encoder[T](columnType)
   }
 
-  override def supports(columnType: ColumnType[_, _]) = columnType match {
+  override def supports(columnType: ColumnType[_, _]): Boolean = columnType match {
     case INT | LONG | STRING => true
     case _ => false
   }
@@ -237,7 +240,7 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
       }
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
       if (overflow) {
         throw new IllegalStateException(
           "Dictionary encoding should not be used because of dictionary overflow.")
@@ -260,9 +263,9 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
       to
     }
 
-    override def uncompressedSize = _uncompressedSize
+    override def uncompressedSize: Int = _uncompressedSize
 
-    override def compressedSize = if (overflow) Int.MaxValue else dictionarySize + count * 2
+    override def compressedSize: Int = if (overflow) Int.MaxValue else dictionarySize + count * 2
   }
 
   class Decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
@@ -284,7 +287,7 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
       columnType.setField(row, ordinal, dictionary(buffer.getShort()))
     }
 
-    override def hasNext = buffer.hasRemaining
+    override def hasNext: Boolean = buffer.hasRemaining
   }
 }
 
@@ -293,15 +296,16 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
 
   val BITS_PER_LONG = 64
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
+    : compression.Decoder[T] = {
     new this.Decoder(buffer).asInstanceOf[compression.Decoder[T]]
   }
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): compression.Encoder[T] = {
     (new this.Encoder).asInstanceOf[compression.Encoder[T]]
   }
 
-  override def supports(columnType: ColumnType[_, _]) = columnType == BOOLEAN
+  override def supports(columnType: ColumnType[_, _]): Boolean = columnType == BOOLEAN
 
   class Encoder extends compression.Encoder[BooleanType.type] {
     private var _uncompressedSize = 0
@@ -310,7 +314,7 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
       _uncompressedSize += BOOLEAN.defaultSize
     }
 
-    override def compress(from: ByteBuffer, to: ByteBuffer) = {
+    override def compress(from: ByteBuffer, to: ByteBuffer): ByteBuffer = {
       to.putInt(BooleanBitSet.typeId)
         // Total element count (1 byte per Boolean value)
         .putInt(from.remaining)
@@ -347,9 +351,9 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
       to
     }
 
-    override def uncompressedSize = _uncompressedSize
+    override def uncompressedSize: Int = _uncompressedSize
 
-    override def compressedSize = {
+    override def compressedSize: Int = {
       val extra = if (_uncompressedSize % BITS_PER_LONG == 0) 0 else 1
       (_uncompressedSize / BITS_PER_LONG + extra) * 8 + 4
     }
@@ -380,22 +384,23 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
 private[sql] case object IntDelta extends CompressionScheme {
   override def typeId: Int = 4
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
+    : compression.Decoder[T] = {
     new Decoder(buffer, INT).asInstanceOf[compression.Decoder[T]]
   }
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): compression.Encoder[T] = {
     (new Encoder).asInstanceOf[compression.Encoder[T]]
   }
 
-  override def supports(columnType: ColumnType[_, _]) = columnType == INT
+  override def supports(columnType: ColumnType[_, _]): Boolean = columnType == INT
 
   class Encoder extends compression.Encoder[IntegerType.type] {
     protected var _compressedSize: Int = 0
     protected var _uncompressedSize: Int = 0
 
-    override def compressedSize = _compressedSize
-    override def uncompressedSize = _uncompressedSize
+    override def compressedSize: Int = _compressedSize
+    override def uncompressedSize: Int = _uncompressedSize
 
     private var prevValue: Int = _
 
@@ -459,22 +464,23 @@ private[sql] case object IntDelta extends CompressionScheme {
 private[sql] case object LongDelta extends CompressionScheme {
   override def typeId: Int = 5
 
-  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T]) = {
+  override def decoder[T <: NativeType](buffer: ByteBuffer, columnType: NativeColumnType[T])
+    : compression.Decoder[T] = {
     new Decoder(buffer, LONG).asInstanceOf[compression.Decoder[T]]
   }
 
-  override def encoder[T <: NativeType](columnType: NativeColumnType[T]) = {
+  override def encoder[T <: NativeType](columnType: NativeColumnType[T]): compression.Encoder[T] = {
     (new Encoder).asInstanceOf[compression.Encoder[T]]
   }
 
-  override def supports(columnType: ColumnType[_, _]) = columnType == LONG
+  override def supports(columnType: ColumnType[_, _]): Boolean = columnType == LONG
 
   class Encoder extends compression.Encoder[LongType.type] {
     protected var _compressedSize: Int = 0
     protected var _uncompressedSize: Int = 0
 
-    override def compressedSize = _compressedSize
-    override def uncompressedSize = _uncompressedSize
+    override def compressedSize: Int = _compressedSize
+    override def uncompressedSize: Int = _uncompressedSize
 
     private var prevValue: Long = _
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
index ad44a01d0e164..18b1ba4c5c4b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -21,6 +21,7 @@ import java.util.HashMap
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
@@ -45,7 +46,7 @@ case class Aggregate(
     child: SparkPlan)
   extends UnaryNode {
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: List[Distribution] = {
     if (partial) {
       UnspecifiedDistribution :: Nil
     } else {
@@ -55,8 +56,9 @@ case class Aggregate(
         ClusteredDistribution(groupingExpressions) :: Nil
       }
     }
+  }
 
-  override def output = aggregateExpressions.map(_.toAttribute)
+  override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
 
   /**
    * An aggregate that needs to be computed for each row in a group.
@@ -119,7 +121,7 @@ case class Aggregate(
     }
   }
 
-  override def execute() = attachTree(this, "execute") {
+  override def execute(): RDD[Row] = attachTree(this, "execute") {
     if (groupingExpressions.isEmpty) {
       child.execute().mapPartitions { iter =>
         val buffer = newAggregateBuffer()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index 7c0b72aab448e..437408d30bfd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.shuffle.sort.SortShuffleManager
+import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.{SparkEnv, HashPartitioner, RangePartitioner, SparkConf}
-import org.apache.spark.rdd.ShuffledRDD
+import org.apache.spark.rdd.{RDD, ShuffledRDD}
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.errors.attachTree
-import org.apache.spark.sql.catalyst.expressions.RowOrdering
+import org.apache.spark.sql.catalyst.expressions.{Attribute, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.util.MutablePair
@@ -34,9 +35,9 @@ import org.apache.spark.util.MutablePair
 @DeveloperApi
 case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode {
 
-  override def outputPartitioning = newPartitioning
+  override def outputPartitioning: Partitioning = newPartitioning
 
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 
   /** We must copy rows when sort based shuffle is on */
   protected def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
@@ -44,7 +45,7 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
   private val bypassMergeThreshold =
     child.sqlContext.sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200)
 
-  override def execute() = attachTree(this , "execute") {
+  override def execute(): RDD[Row] = attachTree(this , "execute") {
     newPartitioning match {
       case HashPartitioning(expressions, numPartitions) =>
         // TODO: Eliminate redundant expressions in grouping key and value.
@@ -123,13 +124,13 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
  */
 private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPlan] {
   // TODO: Determine the number of partitions.
-  def numPartitions = sqlContext.conf.numShufflePartitions
+  def numPartitions: Int = sqlContext.conf.numShufflePartitions
 
   def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
     case operator: SparkPlan =>
       // Check if every child's outputPartitioning satisfies the corresponding
       // required data distribution.
-      def meetsRequirements =
+      def meetsRequirements: Boolean =
         !operator.requiredChildDistribution.zip(operator.children).map {
           case (required, child) =>
             val valid = child.outputPartitioning.satisfies(required)
@@ -147,7 +148,7 @@ private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPl
       // datasets are both clustered by "a", but these two outputPartitionings are not
       // compatible.
       // TODO: ASSUMES TRANSITIVITY?
-      def compatible =
+      def compatible: Boolean =
         !operator.children
           .map(_.outputPartitioning)
           .sliding(2)
@@ -158,7 +159,7 @@ private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPl
 
       // Check if the partitioning we want to ensure is the same as the child's output
       // partitioning. If so, we do not need to add the Exchange operator.
-      def addExchangeIfNecessary(partitioning: Partitioning, child: SparkPlan) =
+      def addExchangeIfNecessary(partitioning: Partitioning, child: SparkPlan): SparkPlan =
         if (child.outputPartitioning != partitioning) Exchange(partitioning, child) else child
 
       if (meetsRequirements && compatible) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 248dc1512b4d3..d8955725e59b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
 import org.apache.spark.sql.types.StructType
 
+import scala.collection.immutable
+
 /**
  * :: DeveloperApi ::
  */
@@ -58,17 +60,17 @@ object RDDConversions {
 case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlContext: SQLContext)
   extends LogicalPlan with MultiInstanceRelation {
 
-  override def children = Nil
+  override def children: Seq[LogicalPlan] = Nil
 
-  override def newInstance() =
+  override def newInstance(): LogicalRDD.this.type =
     LogicalRDD(output.map(_.newInstance()), rdd)(sqlContext).asInstanceOf[this.type]
 
-  override def sameResult(plan: LogicalPlan) = plan match {
+  override def sameResult(plan: LogicalPlan): Boolean = plan match {
     case LogicalRDD(_, otherRDD) => rdd.id == otherRDD.id
     case _ => false
   }
 
-  @transient override lazy val statistics = Statistics(
+  @transient override lazy val statistics: Statistics = Statistics(
     // TODO: Instead of returning a default value here, find a way to return a meaningful size
     // estimate for RDDs. See PR 1238 for more discussions.
     sizeInBytes = BigInt(sqlContext.conf.defaultSizeInBytes)
@@ -77,24 +79,24 @@ case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlContext: SQLCont
 
 /** Physical plan node for scanning data from an RDD. */
 case class PhysicalRDD(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
-  override def execute() = rdd
+  override def execute(): RDD[Row] = rdd
 }
 
 /** Logical plan node for scanning data from a local collection. */
 case class LogicalLocalTable(output: Seq[Attribute], rows: Seq[Row])(sqlContext: SQLContext)
    extends LogicalPlan with MultiInstanceRelation {
 
-  override def children = Nil
+  override def children: Seq[LogicalPlan] = Nil
 
-  override def newInstance() =
+  override def newInstance(): this.type =
     LogicalLocalTable(output.map(_.newInstance()), rows)(sqlContext).asInstanceOf[this.type]
 
-  override def sameResult(plan: LogicalPlan) = plan match {
+  override def sameResult(plan: LogicalPlan): Boolean = plan match {
     case LogicalRDD(_, otherRDD) => rows == rows
     case _ => false
   }
 
-  @transient override lazy val statistics = Statistics(
+  @transient override lazy val statistics: Statistics = Statistics(
     // TODO: Improve the statistics estimation.
     // This is made small enough so it can be broadcasted.
     sizeInBytes = sqlContext.conf.autoBroadcastJoinThreshold - 1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
index 95172420608f9..575849481faad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
@@ -42,7 +43,7 @@ case class Expand(
   // as UNKNOWN partitioning
   override def outputPartitioning: Partitioning = UnknownPartitioning(0)
 
-  override def execute() = attachTree(this, "execute") {
+  override def execute(): RDD[Row] = attachTree(this, "execute") {
     child.execute().mapPartitions { iter =>
       // TODO Move out projection objects creation and transfer to
       // workers via closure. However we can't assume the Projection
@@ -55,7 +56,7 @@ case class Expand(
         private[this] var idx = -1  // -1 means the initial state
         private[this] var input: Row = _
 
-        override final def hasNext = (-1 < idx && idx < groups.length) || iter.hasNext
+        override final def hasNext: Boolean = (-1 < idx && idx < groups.length) || iter.hasNext
 
         override final def next(): Row = {
           if (idx <= 0) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index 38877c28de3a8..12271048bb39c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
@@ -54,7 +55,7 @@ case class Generate(
 
   val boundGenerator = BindReferences.bindReference(generator, child.output)
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     if (join) {
       child.execute().mapPartitions { iter =>
         val nullValues = Seq.fill(generator.output.size)(Literal(null))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
index 4abe26fe4afc6..89682d25ca7dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.trees._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
@@ -49,7 +50,7 @@ case class GeneratedAggregate(
     child: SparkPlan)
   extends UnaryNode {
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[Distribution] =
     if (partial) {
       UnspecifiedDistribution :: Nil
     } else {
@@ -60,9 +61,9 @@ case class GeneratedAggregate(
       }
     }
 
-  override def output = aggregateExpressions.map(_.toAttribute)
+  override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val aggregatesToCompute = aggregateExpressions.flatMap { a =>
       a.collect { case agg: AggregateExpression => agg}
     }
@@ -271,9 +272,9 @@ case class GeneratedAggregate(
           private[this] val resultIterator = buffers.entrySet.iterator()
           private[this] val resultProjection = resultProjectionBuilder()
 
-          def hasNext = resultIterator.hasNext
+          def hasNext: Boolean = resultIterator.hasNext
 
-          def next() = {
+          def next(): Row = {
             val currentGroup = resultIterator.next()
             resultProjection(joinedRow(currentGroup.getKey, currentGroup.getValue))
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
index d3a18b37d52b9..5bd699a2fa949 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -29,11 +30,11 @@ case class LocalTableScan(output: Seq[Attribute], rows: Seq[Row]) extends LeafNo
 
   private lazy val rdd = sqlContext.sparkContext.parallelize(rows)
 
-  override def execute() = rdd
+  override def execute(): RDD[Row] = rdd
 
-  override def executeCollect() =
+  override def executeCollect(): Array[Row] =
     rows.map(ScalaReflection.convertRowToScala(_, schema)).toArray
 
-  override def executeTake(limit: Int) =
+  override def executeTake(limit: Int): Array[Row] =
     rows.map(ScalaReflection.convertRowToScala(_, schema)).take(limit).toArray
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 052766c20abc2..d239637cd4b4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -67,6 +67,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   // TODO: Move to `DistributedPlan`
   /** Specifies how data is partitioned across different nodes in the cluster. */
   def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH!
+
   /** Specifies any partition requirements on the input data for this operator. */
   def requiredChildDistribution: Seq[Distribution] =
     Seq.fill(children.size)(UnspecifiedDistribution)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
index 30564e14fa896..c4534fd5f67e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
@@ -74,7 +74,7 @@ private[execution] class KryoResourcePool(size: Int)
     new KryoSerializer(sparkConf)
   }
 
-  def newInstance() = ser.newInstance()
+  def newInstance(): SerializerInstance = ser.newInstance()
 }
 
 private[sql] object SparkSqlSerializer {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 5281c7502556a..2b581152e5f77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -154,7 +154,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => Nil
     }
 
-    def canBeCodeGened(aggs: Seq[AggregateExpression]) = !aggs.exists {
+    def canBeCodeGened(aggs: Seq[AggregateExpression]): Boolean = !aggs.exists {
       case _: Sum | _: Count | _: Max | _: CombineSetsAndCount => false
       // The generated set implementation is pretty limited ATM.
       case CollectHashSet(exprs) if exprs.size == 1  &&
@@ -162,7 +162,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => true
     }
 
-    def allAggregates(exprs: Seq[Expression]) =
+    def allAggregates(exprs: Seq[Expression]): Seq[AggregateExpression] =
       exprs.flatMap(_.collect { case a: AggregateExpression => a })
   }
 
@@ -257,7 +257,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
-    def numPartitions = self.numPartitions
+    def numPartitions: Int = self.numPartitions
 
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case r: RunnableCommand => ExecutedCommand(r) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 710268584cff1..20c9bc3e75542 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -24,7 +24,7 @@ import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, OrderedDistribution, SinglePartition, UnspecifiedDistribution}
+import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.util.MutablePair
 import org.apache.spark.util.collection.ExternalSorter
 
@@ -33,11 +33,11 @@ import org.apache.spark.util.collection.ExternalSorter
  */
 @DeveloperApi
 case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
-  override def output = projectList.map(_.toAttribute)
+  override def output: Seq[Attribute] = projectList.map(_.toAttribute)
 
   @transient lazy val buildProjection = newMutableProjection(projectList, child.output)
 
-  override def execute() = child.execute().mapPartitions { iter =>
+  override def execute(): RDD[Row] = child.execute().mapPartitions { iter =>
     val resuableProjection = buildProjection()
     iter.map(resuableProjection)
   }
@@ -48,11 +48,11 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends
  */
 @DeveloperApi
 case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 
-  @transient lazy val conditionEvaluator = newPredicate(condition, child.output)
+  @transient lazy val conditionEvaluator: (Row) => Boolean = newPredicate(condition, child.output)
 
-  override def execute() = child.execute().mapPartitions { iter =>
+  override def execute(): RDD[Row] = child.execute().mapPartitions { iter =>
     iter.filter(conditionEvaluator)
   }
 }
@@ -64,10 +64,12 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
 case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child: SparkPlan)
   extends UnaryNode
 {
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 
   // TODO: How to pick seed?
-  override def execute() = child.execute().map(_.copy()).sample(withReplacement, fraction, seed)
+  override def execute(): RDD[Row] = {
+    child.execute().map(_.copy()).sample(withReplacement, fraction, seed)
+  }
 }
 
 /**
@@ -76,8 +78,8 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child:
 @DeveloperApi
 case class Union(children: Seq[SparkPlan]) extends SparkPlan {
   // TODO: attributes output by union should be distinct for nullability purposes
-  override def output = children.head.output
-  override def execute() = sparkContext.union(children.map(_.execute()))
+  override def output: Seq[Attribute] = children.head.output
+  override def execute(): RDD[Row] = sparkContext.union(children.map(_.execute()))
 }
 
 /**
@@ -97,12 +99,12 @@ case class Limit(limit: Int, child: SparkPlan)
   /** We must copy rows when sort based shuffle is on */
   private def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager]
 
-  override def output = child.output
-  override def outputPartitioning = SinglePartition
+  override def output: Seq[Attribute] = child.output
+  override def outputPartitioning: Partitioning = SinglePartition
 
   override def executeCollect(): Array[Row] = child.executeTake(limit)
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val rdd: RDD[_ <: Product2[Boolean, Row]] = if (sortBasedShuffleOn) {
       child.execute().mapPartitions { iter =>
         iter.take(limit).map(row => (false, row.copy()))
@@ -129,20 +131,21 @@ case class Limit(limit: Int, child: SparkPlan)
 @DeveloperApi
 case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan) extends UnaryNode {
 
-  override def output = child.output
-  override def outputPartitioning = SinglePartition
+  override def output: Seq[Attribute] = child.output
+
+  override def outputPartitioning: Partitioning = SinglePartition
 
-  val ord = new RowOrdering(sortOrder, child.output)
+  private val ord: RowOrdering = new RowOrdering(sortOrder, child.output)
 
-  private def collectData() = child.execute().map(_.copy()).takeOrdered(limit)(ord)
+  private def collectData(): Array[Row] = child.execute().map(_.copy()).takeOrdered(limit)(ord)
 
   // TODO: Is this copying for no reason?
-  override def executeCollect() =
+  override def executeCollect(): Array[Row] =
     collectData().map(ScalaReflection.convertRowToScala(_, this.schema))
 
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
-  override def execute() = sparkContext.makeRDD(collectData(), 1)
+  override def execute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1)
 }
 
 /**
@@ -157,17 +160,17 @@ case class Sort(
     global: Boolean,
     child: SparkPlan)
   extends UnaryNode {
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[Distribution] =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  override def execute() = attachTree(this, "sort") {
+  override def execute(): RDD[Row] = attachTree(this, "sort") {
     child.execute().mapPartitions( { iterator =>
       val ordering = newOrdering(sortOrder, child.output)
       iterator.map(_.copy()).toArray.sorted(ordering).iterator
     }, preservesPartitioning = true)
   }
 
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 }
 
 /**
@@ -182,10 +185,11 @@ case class ExternalSort(
     global: Boolean,
     child: SparkPlan)
   extends UnaryNode {
-  override def requiredChildDistribution =
+
+  override def requiredChildDistribution: Seq[Distribution] =
     if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil
 
-  override def execute() = attachTree(this, "sort") {
+  override def execute(): RDD[Row] = attachTree(this, "sort") {
     child.execute().mapPartitions( { iterator =>
       val ordering = newOrdering(sortOrder, child.output)
       val sorter = new ExternalSorter[Row, Null, Row](ordering = Some(ordering))
@@ -194,7 +198,7 @@ case class ExternalSort(
     }, preservesPartitioning = true)
   }
 
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 }
 
 /**
@@ -206,12 +210,12 @@ case class ExternalSort(
  */
 @DeveloperApi
 case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode {
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[Distribution] =
     if (partial) UnspecifiedDistribution :: Nil else ClusteredDistribution(child.output) :: Nil
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     child.execute().mapPartitions { iter =>
       val hashSet = new scala.collection.mutable.HashSet[Row]()
 
@@ -236,9 +240,9 @@ case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode {
  */
 @DeveloperApi
 case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
-  override def output = left.output
+  override def output: Seq[Attribute] = left.output
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     left.execute().map(_.copy()).subtract(right.execute().map(_.copy()))
   }
 }
@@ -250,9 +254,9 @@ case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
  */
 @DeveloperApi
 case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
-  override def output = children.head.output
+  override def output: Seq[Attribute] = children.head.output
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     left.execute().map(_.copy()).intersection(right.execute().map(_.copy()))
   }
 }
@@ -265,6 +269,7 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
  */
 @DeveloperApi
 case class OutputFaker(output: Seq[Attribute], child: SparkPlan) extends SparkPlan {
-  def children = child :: Nil
-  def execute() = child.execute()
+  def children: Seq[SparkPlan] = child :: Nil
+
+  def execute(): RDD[Row] = child.execute()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index a11232142d0fb..fad7a281dc1e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Row, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import scala.collection.mutable.ArrayBuffer
 
 /**
  * A logical command that is executed for its side-effects.  `RunnableCommand`s are
@@ -54,9 +53,9 @@ case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan {
    */
   protected[sql] lazy val sideEffectResult: Seq[Row] = cmd.run(sqlContext)
 
-  override def output = cmd.output
+  override def output: Seq[Attribute] = cmd.output
 
-  override def children = Nil
+  override def children: Seq[SparkPlan] = Nil
 
   override def executeCollect(): Array[Row] = sideEffectResult.toArray
 
@@ -71,9 +70,10 @@ case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan {
 @DeveloperApi
 case class SetCommand(
     kv: Option[(String, Option[String])],
-    override val output: Seq[Attribute]) extends RunnableCommand with Logging {
+    override val output: Seq[Attribute])
+  extends RunnableCommand with Logging {
 
-  override def run(sqlContext: SQLContext) = kv match {
+  override def run(sqlContext: SQLContext): Seq[Row] = kv match {
     // Configures the deprecated "mapred.reduce.tasks" property.
     case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, Some(value))) =>
       logWarning(
@@ -119,10 +119,11 @@ case class ExplainCommand(
     logicalPlan: LogicalPlan,
     override val output: Seq[Attribute] =
       Seq(AttributeReference("plan", StringType, nullable = false)()),
-    extended: Boolean = false) extends RunnableCommand {
+    extended: Boolean = false)
+  extends RunnableCommand {
 
   // Run through the optimizer to generate the physical plan.
-  override def run(sqlContext: SQLContext) = try {
+  override def run(sqlContext: SQLContext): Seq[Row] = try {
     // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and detailed properties.
     val queryExecution = sqlContext.executePlan(logicalPlan)
     val outputString = if (extended) queryExecution.toString else queryExecution.simpleString
@@ -140,9 +141,10 @@ case class ExplainCommand(
 case class CacheTableCommand(
     tableName: String,
     plan: Option[LogicalPlan],
-    isLazy: Boolean) extends RunnableCommand {
+    isLazy: Boolean)
+  extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     plan.foreach { logicalPlan =>
       sqlContext.registerDataFrameAsTable(DataFrame(sqlContext, logicalPlan), tableName)
     }
@@ -166,7 +168,7 @@ case class CacheTableCommand(
 @DeveloperApi
 case class UncacheTableCommand(tableName: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     sqlContext.table(tableName).unpersist(blocking = false)
     Seq.empty[Row]
   }
@@ -181,7 +183,7 @@ case class UncacheTableCommand(tableName: String) extends RunnableCommand {
 @DeveloperApi
 case object ClearCacheCommand extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     sqlContext.clearCache()
     Seq.empty[Row]
   }
@@ -196,9 +198,10 @@ case object ClearCacheCommand extends RunnableCommand {
 case class DescribeCommand(
     child: SparkPlan,
     override val output: Seq[Attribute],
-    isExtended: Boolean) extends RunnableCommand {
+    isExtended: Boolean)
+  extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     child.schema.fields.map { field =>
       val cmtKey = "comment"
       val comment = if (field.metadata.contains(cmtKey)) field.metadata.getString(cmtKey) else ""
@@ -220,7 +223,7 @@ case class DescribeCommand(
 case class ShowTablesCommand(databaseName: Option[String]) extends RunnableCommand {
 
   // The result of SHOW TABLES has two columns, tableName and isTemporary.
-  override val output = {
+  override val output: Seq[Attribute] = {
     val schema = StructType(
       StructField("tableName", StringType, false) ::
       StructField("isTemporary", BooleanType, false) :: Nil)
@@ -228,7 +231,7 @@ case class ShowTablesCommand(databaseName: Option[String]) extends RunnableComma
     schema.toAttributes
   }
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     // Since we need to return a Seq of rows, we will call getTables directly
     // instead of calling tables in sqlContext.
     val rows = sqlContext.catalog.getTables(databaseName).map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index ffe388cfa9532..e916e68e58b5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
 import scala.collection.mutable.HashSet
 
-import org.apache.spark.{AccumulatorParam, Accumulator, SparkContext}
+import org.apache.spark.{AccumulatorParam, Accumulator}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.SparkContext._
 import org.apache.spark.sql.{SQLConf, SQLContext, DataFrame, Row}
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.types._
@@ -43,7 +45,7 @@ package object debug {
    * Augments [[SQLContext]] with debug methods.
    */
   implicit class DebugSQLContext(sqlContext: SQLContext) {
-    def debug() = {
+    def debug(): Unit = {
       sqlContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "false")
     }
   }
@@ -88,7 +90,7 @@ package object debug {
   }
 
   private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
-    def output = child.output
+    def output: Seq[Attribute] = child.output
 
     implicit object SetAccumulatorParam extends AccumulatorParam[HashSet[String]] {
       def zero(initialValue: HashSet[String]): HashSet[String] = {
@@ -109,10 +111,10 @@ package object debug {
      */
     case class ColumnMetrics(
         elementTypes: Accumulator[HashSet[String]] = sparkContext.accumulator(HashSet.empty))
-    val tupleCount = sparkContext.accumulator[Int](0)
+    val tupleCount: Accumulator[Int] = sparkContext.accumulator[Int](0)
 
-    val numColumns = child.output.size
-    val columnStats = Array.fill(child.output.size)(new ColumnMetrics())
+    val numColumns: Int = child.output.size
+    val columnStats: Array[ColumnMetrics] = Array.fill(child.output.size)(new ColumnMetrics())
 
     def dumpStats(): Unit = {
       println(s"== ${child.simpleString} ==")
@@ -123,11 +125,11 @@ package object debug {
       }
     }
 
-    def execute() = {
+    def execute(): RDD[Row] = {
       child.execute().mapPartitions { iter =>
         new Iterator[Row] {
-          def hasNext = iter.hasNext
-          def next() = {
+          def hasNext: Boolean = iter.hasNext
+          def next(): Row = {
             val currentRow = iter.next()
             tupleCount += 1
             var i = 0
@@ -180,18 +182,18 @@ package object debug {
   private[sql] case class TypeCheck(child: SparkPlan) extends SparkPlan {
     import TypeCheck._
 
-    override def nodeName  = ""
+    override def nodeName: String = ""
 
     /* Only required when defining this class in a REPL.
     override def makeCopy(args: Array[Object]): this.type =
       TypeCheck(args(0).asInstanceOf[SparkPlan]).asInstanceOf[this.type]
     */
 
-    def output = child.output
+    def output: Seq[Attribute] = child.output
 
-    def children = child :: Nil
+    def children: List[SparkPlan] = child :: Nil
 
-    def execute() = {
+    def execute(): RDD[Row] = {
       child.execute().map { row =>
         try typeCheck(row, child.schema) catch {
           case e: Exception =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
index 2dd22c020ef12..926f5e6c137ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.sql.execution.joins
 
+import org.apache.spark.rdd.RDD
+
 import scala.concurrent._
 import scala.concurrent.duration._
 import scala.concurrent.ExecutionContext.Implicits.global
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.expressions.{Row, Expression}
-import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnspecifiedDistribution}
+import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning, UnspecifiedDistribution}
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
 
 /**
@@ -42,7 +44,7 @@ case class BroadcastHashJoin(
     right: SparkPlan)
   extends BinaryNode with HashJoin {
 
-  val timeout = {
+  val timeout: Duration = {
     val timeoutValue = sqlContext.conf.broadcastTimeout
     if (timeoutValue < 0) {
       Duration.Inf
@@ -53,7 +55,7 @@ case class BroadcastHashJoin(
 
   override def outputPartitioning: Partitioning = streamedPlan.outputPartitioning
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[Distribution] =
     UnspecifiedDistribution :: UnspecifiedDistribution :: Nil
 
   @transient
@@ -64,7 +66,7 @@ case class BroadcastHashJoin(
     sparkContext.broadcast(hashed)
   }
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val broadcastRelation = Await.result(broadcastFuture, timeout)
 
     streamedPlan.execute().mapPartitions { streamedIter =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
index 2ab064fd0151e..3ef1e0d7fbdd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.catalyst.expressions.{Expression, Row}
-import org.apache.spark.sql.catalyst.plans.physical.ClusteredDistribution
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row}
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
 
 /**
@@ -34,11 +34,11 @@ case class BroadcastLeftSemiJoinHash(
     left: SparkPlan,
     right: SparkPlan) extends BinaryNode with HashJoin {
 
-  override val buildSide = BuildRight
+  override val buildSide: BuildSide = BuildRight
 
-  override def output = left.output
+  override def output: Seq[Attribute] = left.output
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val buildIter= buildPlan.execute().map(_.copy()).collect().toIterator
     val hashSet = new java.util.HashSet[Row]()
     var currentRow: Row = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
index 36aad13778bd2..83b1a83765153 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.plans.{FullOuter, JoinType, LeftOuter, RightOuter}
@@ -44,7 +45,7 @@ case class BroadcastNestedLoopJoin(
 
   override def outputPartitioning: Partitioning = streamed.outputPartitioning
 
-  override def output = {
+  override def output: Seq[Attribute] = {
     joinType match {
       case LeftOuter =>
         left.output ++ right.output.map(_.withNullability(true))
@@ -63,7 +64,7 @@ case class BroadcastNestedLoopJoin(
         .map(c => BindReferences.bindReference(c, left.output ++ right.output))
         .getOrElse(Literal(true)))
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val broadcastedRelation =
       sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
index 76c14c02aab34..1cbc98354d673 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.catalyst.expressions.JoinedRow
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{Attribute, JoinedRow}
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
 
 /**
@@ -26,9 +28,9 @@ import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
  */
 @DeveloperApi
 case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode {
-  override def output = left.output ++ right.output
+  override def output: Seq[Attribute] = left.output ++ right.output
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val leftResults = left.execute().map(_.copy())
     val rightResults = right.execute().map(_.copy())
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 4012d757d5f9a..851de1685509a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -41,7 +41,7 @@ trait HashJoin {
     case BuildRight => (rightKeys, leftKeys)
   }
 
-  override def output = left.output ++ right.output
+  override def output: Seq[Attribute] = left.output ++ right.output
 
   @transient protected lazy val buildSideKeyGenerator: Projection =
     newProjection(buildKeys, buildPlan.output)
@@ -65,7 +65,7 @@ trait HashJoin {
         (currentMatchPosition != -1 && currentMatchPosition < currentHashMatches.size) ||
           (streamIter.hasNext && fetchNext())
 
-      override final def next() = {
+      override final def next(): Row = {
         val ret = buildSide match {
           case BuildRight => joinRow(currentStreamedRow, currentHashMatches(currentMatchPosition))
           case BuildLeft => joinRow(currentHashMatches(currentMatchPosition), currentStreamedRow)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
index 59ef904272545..a396c0f5d56ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.joins
 
 import java.util.{HashMap => JavaHashMap}
 
+import org.apache.spark.rdd.RDD
+
 import scala.collection.JavaConversions._
 
 import org.apache.spark.annotation.DeveloperApi
@@ -49,10 +51,10 @@ case class HashOuterJoin(
     case x => throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
   }
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[ClusteredDistribution] =
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
-  override def output = {
+  override def output: Seq[Attribute] = {
     joinType match {
       case LeftOuter =>
         left.output ++ right.output.map(_.withNullability(true))
@@ -78,12 +80,12 @@ case class HashOuterJoin(
 
   private[this] def leftOuterIterator(
       key: Row, joinedRow: JoinedRow, rightIter: Iterable[Row]): Iterator[Row] = {
-    val ret: Iterable[Row] = (
+    val ret: Iterable[Row] = {
       if (!key.anyNull) {
         val temp = rightIter.collect {
-          case r if (boundCondition(joinedRow.withRight(r))) => joinedRow.copy
+          case r if boundCondition(joinedRow.withRight(r)) => joinedRow.copy()
         }
-        if (temp.size  == 0) {
+        if (temp.size == 0) {
           joinedRow.withRight(rightNullRow).copy :: Nil
         } else {
           temp
@@ -91,19 +93,19 @@ case class HashOuterJoin(
       } else {
         joinedRow.withRight(rightNullRow).copy :: Nil
       }
-    )
+    }
     ret.iterator
   }
 
   private[this] def rightOuterIterator(
       key: Row, leftIter: Iterable[Row], joinedRow: JoinedRow): Iterator[Row] = {
 
-    val ret: Iterable[Row] = (
+    val ret: Iterable[Row] = {
       if (!key.anyNull) {
         val temp = leftIter.collect {
-          case l if (boundCondition(joinedRow.withLeft(l))) => joinedRow.copy
+          case l if boundCondition(joinedRow.withLeft(l)) => joinedRow.copy
         }
-        if (temp.size  == 0) {
+        if (temp.size == 0) {
           joinedRow.withLeft(leftNullRow).copy :: Nil
         } else {
           temp
@@ -111,7 +113,7 @@ case class HashOuterJoin(
       } else {
         joinedRow.withLeft(leftNullRow).copy :: Nil
       }
-    )
+    }
     ret.iterator
   }
 
@@ -130,12 +132,12 @@ case class HashOuterJoin(
           // 1. For those matched (satisfy the join condition) records with both sides filled,
           //    append them directly
 
-          case (r, idx) if (boundCondition(joinedRow.withRight(r)))=> {
+          case (r, idx) if boundCondition(joinedRow.withRight(r)) =>
             matched = true
             // if the row satisfy the join condition, add its index into the matched set
             rightMatchedSet.add(idx)
-            joinedRow.copy
-          }
+            joinedRow.copy()
+
         } ++ DUMMY_LIST.filter(_ => !matched).map( _ => {
           // 2. For those unmatched records in left, append additional records with empty right.
 
@@ -143,22 +145,21 @@ case class HashOuterJoin(
           // as we don't know whether we need to append it until finish iterating all
           // of the records in right side.
           // If we didn't get any proper row, then append a single row with empty right.
-          joinedRow.withRight(rightNullRow).copy
+          joinedRow.withRight(rightNullRow).copy()
         })
       } ++ rightIter.zipWithIndex.collect {
         // 3. For those unmatched records in right, append additional records with empty left.
 
         // Re-visiting the records in right, and append additional row with empty left, if its not
         // in the matched set.
-        case (r, idx) if (!rightMatchedSet.contains(idx)) => {
-          joinedRow(leftNullRow, r).copy
-        }
+        case (r, idx) if !rightMatchedSet.contains(idx) =>
+          joinedRow(leftNullRow, r).copy()
       }
     } else {
       leftIter.iterator.map[Row] { l =>
-        joinedRow(l, rightNullRow).copy
+        joinedRow(l, rightNullRow).copy()
       } ++ rightIter.iterator.map[Row] { r =>
-        joinedRow(leftNullRow, r).copy
+        joinedRow(leftNullRow, r).copy()
       }
     }
   }
@@ -182,13 +183,13 @@ case class HashOuterJoin(
     hashTable
   }
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val joinedRow = new JoinedRow()
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       // TODO this probably can be replaced by external sort (sort merged join?)
 
       joinType match {
-        case LeftOuter => {
+        case LeftOuter =>
           val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
           val keyGenerator = newProjection(leftKeys, left.output)
           leftIter.flatMap( currentRow => {
@@ -196,8 +197,8 @@ case class HashOuterJoin(
             joinedRow.withLeft(currentRow)
             leftOuterIterator(rowKey, joinedRow, rightHashTable.getOrElse(rowKey, EMPTY_LIST))
           })
-        }
-        case RightOuter => {
+
+        case RightOuter =>
           val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
           val keyGenerator = newProjection(rightKeys, right.output)
           rightIter.flatMap ( currentRow => {
@@ -205,8 +206,8 @@ case class HashOuterJoin(
             joinedRow.withRight(currentRow)
             rightOuterIterator(rowKey, leftHashTable.getOrElse(rowKey, EMPTY_LIST), joinedRow)
           })
-        }
-        case FullOuter => {
+
+        case FullOuter =>
           val leftHashTable = buildHashTable(leftIter, newProjection(leftKeys, left.output))
           val rightHashTable = buildHashTable(rightIter, newProjection(rightKeys, right.output))
           (leftHashTable.keySet ++ rightHashTable.keySet).iterator.flatMap { key =>
@@ -214,7 +215,7 @@ case class HashOuterJoin(
               leftHashTable.getOrElse(key, EMPTY_LIST),
               rightHashTable.getOrElse(key, EMPTY_LIST), joinedRow)
           }
-        }
+
         case x => throw new Exception(s"HashOuterJoin should not take $x as the JoinType")
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 38b8993b03f82..2fa1cf5add3b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -38,7 +38,7 @@ private[joins] sealed trait HashedRelation {
 private[joins] final class GeneralHashedRelation(hashTable: JavaHashMap[Row, CompactBuffer[Row]])
   extends HashedRelation with Serializable {
 
-  override def get(key: Row) = hashTable.get(key)
+  override def get(key: Row): CompactBuffer[Row] = hashTable.get(key)
 }
 
 
@@ -49,7 +49,7 @@ private[joins] final class GeneralHashedRelation(hashTable: JavaHashMap[Row, Com
 private[joins] final class UniqueKeyHashedRelation(hashTable: JavaHashMap[Row, Row])
   extends HashedRelation with Serializable {
 
-  override def get(key: Row) = {
+  override def get(key: Row): CompactBuffer[Row] = {
     val v = hashTable.get(key)
     if (v eq null) null else CompactBuffer(v)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
index 60003d1900d85..1fa7e7bd0406c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
@@ -35,12 +36,13 @@ case class LeftSemiJoinBNL(
 
   override def outputPartitioning: Partitioning = streamed.outputPartitioning
 
-  override def output = left.output
+  override def output: Seq[Attribute] = left.output
 
   /** The Streamed Relation */
-  override def left = streamed
+  override def left: SparkPlan = streamed
+
   /** The Broadcast relation */
-  override def right = broadcast
+  override def right: SparkPlan = broadcast
 
   @transient private lazy val boundCondition =
     InterpretedPredicate(
@@ -48,7 +50,7 @@ case class LeftSemiJoinBNL(
         .map(c => BindReferences.bindReference(c, left.output ++ right.output))
         .getOrElse(Literal(true)))
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     val broadcastedRelation =
       sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
index ea7babf3be948..a04f2a63b5a55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.catalyst.expressions.{Expression, Row}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row}
 import org.apache.spark.sql.catalyst.plans.physical.ClusteredDistribution
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
 
@@ -34,14 +35,14 @@ case class LeftSemiJoinHash(
     left: SparkPlan,
     right: SparkPlan) extends BinaryNode with HashJoin {
 
-  override val buildSide = BuildRight
+  override val buildSide: BuildSide = BuildRight
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[ClusteredDistribution] =
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
-  override def output = left.output
+  override def output: Seq[Attribute] = left.output
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
       val hashSet = new java.util.HashSet[Row]()
       var currentRow: Row = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
index 418c1c23e5546..a6cd8337c1c3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Partitioning}
 import org.apache.spark.sql.execution.{BinaryNode, SparkPlan}
@@ -38,10 +40,10 @@ case class ShuffledHashJoin(
 
   override def outputPartitioning: Partitioning = left.outputPartitioning
 
-  override def requiredChildDistribution =
+  override def requiredChildDistribution: Seq[ClusteredDistribution] =
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) =>
       val hashed = HashedRelation(buildIter, buildSideKeyGenerator)
       hashJoin(streamIter, hashed)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
index 33632b8e82ff9..5b308d88d4cdf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution
 
 import java.util.{List => JList, Map => JMap}
 
+import org.apache.spark.rdd.RDD
+
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters._
 
@@ -48,11 +50,13 @@ private[spark] case class PythonUDF(
     dataType: DataType,
     children: Seq[Expression]) extends Expression with SparkLogging {
 
-  override def toString = s"PythonUDF#$name(${children.mkString(",")})"
+  override def toString: String = s"PythonUDF#$name(${children.mkString(",")})"
 
   def nullable: Boolean = true
 
-  override def eval(input: Row) = sys.error("PythonUDFs can not be directly evaluated.")
+  override def eval(input: Row): PythonUDF.this.EvaluatedType = {
+    sys.error("PythonUDFs can not be directly evaluated.")
+  }
 }
 
 /**
@@ -63,7 +67,7 @@ private[spark] case class PythonUDF(
  * multiple child operators.
  */
 private[spark] object ExtractPythonUdfs extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan) = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // Skip EvaluatePython nodes.
     case p: EvaluatePython => p
 
@@ -107,7 +111,7 @@ private[spark] object ExtractPythonUdfs extends Rule[LogicalPlan] {
 }
 
 object EvaluatePython {
-  def apply(udf: PythonUDF, child: LogicalPlan) =
+  def apply(udf: PythonUDF, child: LogicalPlan): EvaluatePython =
     new EvaluatePython(udf, child, AttributeReference("pythonUDF", udf.dataType)())
 
   /**
@@ -205,10 +209,10 @@ case class EvaluatePython(
     resultAttribute: AttributeReference)
   extends logical.UnaryNode {
 
-  def output = child.output :+ resultAttribute
+  def output: Seq[Attribute] = child.output :+ resultAttribute
 
   // References should not include the produced attribute.
-  override def references = udf.references
+  override def references: AttributeSet = udf.references
 }
 
 /**
@@ -219,9 +223,10 @@ case class EvaluatePython(
 @DeveloperApi
 case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: SparkPlan)
   extends SparkPlan {
-  def children = child :: Nil
 
-  def execute() = {
+  def children: Seq[SparkPlan] = child :: Nil
+
+  def execute(): RDD[Row] = {
     // TODO: Clean up after ourselves?
     val childResults = child.execute().map(_.copy()).cache()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 87304ce2496b4..76f8593180e85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, DriverManager, ResultSet, ResultSetMetaData, SQLException}
 
+import org.apache.commons.lang.StringEscapeUtils.escapeSql
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow}
@@ -226,16 +227,24 @@ private[sql] class JDBCRDD(
     if (sb.length == 0) "1" else sb.substring(1)
   }
 
+  /**
+   * Converts value to SQL expression.
+   */
+  private def compileValue(value: Any): Any = value match {
+    case stringValue: String => s"'${escapeSql(stringValue)}'"
+    case _ => value
+  }
+
   /**
    * Turns a single Filter into a String representing a SQL expression.
    * Returns null for an unhandled filter.
    */
   private def compileFilter(f: Filter): String = f match {
-    case EqualTo(attr, value) => s"$attr = $value"
-    case LessThan(attr, value) => s"$attr < $value"
-    case GreaterThan(attr, value) => s"$attr > $value"
-    case LessThanOrEqual(attr, value) => s"$attr <= $value"
-    case GreaterThanOrEqual(attr, value) => s"$attr >= $value"
+    case EqualTo(attr, value) => s"$attr = ${compileValue(value)}"
+    case LessThan(attr, value) => s"$attr < ${compileValue(value)}"
+    case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}"
+    case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}"
+    case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}"
     case _ => null
   }
 
@@ -306,7 +315,8 @@ private[sql] class JDBCRDD(
   /**
    * Runs the SQL query against the JDBC driver.
    */
-  override def compute(thePart: Partition, context: TaskContext) = new Iterator[Row] {
+  override def compute(thePart: Partition, context: TaskContext): Iterator[Row] = new Iterator[Row]
+  {
     var closed = false
     var finished = false
     var gotNext = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
index 1778d39c42e2b..df687e6da9bea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.jdbc
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.types.StructType
+
 import scala.collection.mutable.ArrayBuffer
 import java.sql.DriverManager
 
@@ -122,9 +126,9 @@ private[sql] case class JDBCRelation(
   extends BaseRelation
   with PrunedFilteredScan {
 
-  override val schema = JDBCRDD.resolveTable(url, table)
+  override val schema: StructType = JDBCRDD.resolveTable(url, table)
 
-  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]) = {
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
     val driver: String = DriverManager.getDriver(url).getClass.getCanonicalName
     JDBCRDD.scanTable(
       sqlContext.sparkContext,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
index b645199ded18c..f4c99b4b56606 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.json
 import java.io.IOException
 
 import org.apache.hadoop.fs.Path
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Row
 
 import org.apache.spark.sql.{SaveMode, DataFrame, SQLContext}
 import org.apache.spark.sql.sources._
@@ -66,9 +68,23 @@ private[sql] class DefaultSource
       mode match {
         case SaveMode.Append =>
           sys.error(s"Append mode is not supported by ${this.getClass.getCanonicalName}")
-        case SaveMode.Overwrite =>
-          fs.delete(filesystemPath, true)
+        case SaveMode.Overwrite => {
+          var success: Boolean = false
+          try {
+            success = fs.delete(filesystemPath, true)
+          } catch {
+            case e: IOException =>
+              throw new IOException(
+                s"Unable to clear output directory ${filesystemPath.toString} prior"
+                  + s" to writing to JSON table:\n${e.toString}")
+          }
+          if (!success) {
+            throw new IOException(
+              s"Unable to clear output directory ${filesystemPath.toString} prior"
+                + s" to writing to JSON table.")
+          }
           true
+        }
         case SaveMode.ErrorIfExists =>
           sys.error(s"path $path already exists.")
         case SaveMode.Ignore => false
@@ -104,21 +120,29 @@ private[sql] case class JSONRelation(
         samplingRatio,
         sqlContext.conf.columnNameOfCorruptRecord)))
 
-  override def buildScan() =
+  override def buildScan(): RDD[Row] =
     JsonRDD.jsonStringToRow(baseRDD, schema, sqlContext.conf.columnNameOfCorruptRecord)
 
-  override def insert(data: DataFrame, overwrite: Boolean) = {
+  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
     val filesystemPath = new Path(path)
     val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
 
     if (overwrite) {
-      try {
-        fs.delete(filesystemPath, true)
-      } catch {
-        case e: IOException =>
+      if (fs.exists(filesystemPath)) {
+        var success: Boolean = false
+        try {
+          success = fs.delete(filesystemPath, true)
+        } catch {
+          case e: IOException =>
+            throw new IOException(
+              s"Unable to clear output directory ${filesystemPath.toString} prior"
+                + s" to writing to JSON table:\n${e.toString}")
+        }
+        if (!success) {
           throw new IOException(
             s"Unable to clear output directory ${filesystemPath.toString} prior"
-              + s" to INSERT OVERWRITE a JSON table:\n${e.toString}")
+              + s" to writing to JSON table.")
+        }
       }
       // Write the data.
       data.toJSON.saveAsTextFile(path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
index 7d62f3728f036..43ca359b51735 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
@@ -127,6 +127,12 @@ private[sql] object CatalystConverter {
             parent.updateByte(fieldIndex, value.asInstanceOf[ByteType.JvmType])
         }
       }
+      case DateType => {
+        new CatalystPrimitiveConverter(parent, fieldIndex) {
+          override def addInt(value: Int): Unit =
+            parent.updateDate(fieldIndex, value.asInstanceOf[DateType.JvmType])
+        }
+      }
       case d: DecimalType => {
         new CatalystPrimitiveConverter(parent, fieldIndex) {
           override def addBinary(value: Binary): Unit =
@@ -192,6 +198,9 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
   protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit =
     updateField(fieldIndex, value)
 
+  protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit =
+    updateField(fieldIndex, value)
+
   protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit =
     updateField(fieldIndex, value)
 
@@ -388,6 +397,9 @@ private[parquet] class CatalystPrimitiveRowConverter(
   override protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit =
     current.setInt(fieldIndex, value)
 
+  override protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit =
+    current.update(fieldIndex, value)
+
   override protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit =
     current.setLong(fieldIndex, value)
 
@@ -488,7 +500,7 @@ private[parquet] object CatalystTimestampConverter {
   // Also we use NanoTime and Int96Values from parquet-examples.
   // We utilize jodd to convert between NanoTime and Timestamp
   val parquetTsCalendar = new ThreadLocal[Calendar]
-  def getCalendar = {
+  def getCalendar: Calendar = {
     // this is a cache for the calendar instance.
     if (parquetTsCalendar.get == null) {
       parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
index fd161bae128ad..fcb9513ab66f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
@@ -71,16 +71,22 @@ private[sql] case class ParquetRelation(
       sqlContext.conf.isParquetINT96AsTimestamp)
   lazy val attributeMap = AttributeMap(output.map(o => o -> o))
 
-  override def newInstance() = ParquetRelation(path, conf, sqlContext).asInstanceOf[this.type]
+  override def newInstance(): this.type = {
+    ParquetRelation(path, conf, sqlContext).asInstanceOf[this.type]
+  }
 
   // Equals must also take into account the output attributes so that we can distinguish between
   // different instances of the same relation,
-  override def equals(other: Any) = other match {
+  override def equals(other: Any): Boolean = other match {
     case p: ParquetRelation =>
       p.path == path && p.output == output
     case _ => false
   }
 
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(path, output)
+  }
+
   // TODO: Use data from the footers.
   override lazy val statistics = Statistics(sizeInBytes = sqlContext.conf.defaultSizeInBytes)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 5e0be7a98cc17..6dda47d14e842 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -152,8 +152,8 @@ private[sql] case class ParquetTableScan(
 
         if (primitiveRow) {
           new Iterator[Row] {
-            def hasNext = iter.hasNext
-            def next() = {
+            def hasNext: Boolean = iter.hasNext
+            def next(): Row = {
               // We are using CatalystPrimitiveRowConverter and it returns a SpecificMutableRow.
               val row = iter.next()._2.asInstanceOf[SpecificMutableRow]
 
@@ -171,8 +171,8 @@ private[sql] case class ParquetTableScan(
           // Create a mutable row since we need to fill in values from partition columns.
           val mutableRow = new GenericMutableRow(outputSize)
           new Iterator[Row] {
-            def hasNext = iter.hasNext
-            def next() = {
+            def hasNext: Boolean = iter.hasNext
+            def next(): Row = {
               // We are using CatalystGroupConverter and it returns a GenericRow.
               // Since GenericRow is not mutable, we just cast it to a Row.
               val row = iter.next()._2.asInstanceOf[Row]
@@ -255,7 +255,7 @@ private[sql] case class InsertIntoParquetTable(
   /**
    * Inserts all rows into the Parquet file.
    */
-  override def execute() = {
+  override def execute(): RDD[Row] = {
     // TODO: currently we do not check whether the "schema"s are compatible
     // That means if one first creates a table and then INSERTs data with
     // and incompatible schema the execution will fail. It would be nice
@@ -302,7 +302,7 @@ private[sql] case class InsertIntoParquetTable(
     childRdd
   }
 
-  override def output = child.output
+  override def output: Seq[Attribute] = child.output
 
   /**
    * Stores the given Row RDD as a Hadoop file.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index 19bfba34b8f4a..5a1b15490d273 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -212,6 +212,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
         case DoubleType => writer.addDouble(value.asInstanceOf[Double])
         case FloatType => writer.addFloat(value.asInstanceOf[Float])
         case BooleanType => writer.addBoolean(value.asInstanceOf[Boolean])
+        case DateType => writer.addInteger(value.asInstanceOf[Int])
         case d: DecimalType =>
           if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
             sys.error(s"Unsupported datatype $d, cannot write to consumer")
@@ -358,6 +359,7 @@ private[parquet] class MutableRowWriteSupport extends RowWriteSupport {
       case DoubleType => writer.addDouble(record.getDouble(index))
       case FloatType => writer.addFloat(record.getFloat(index))
       case BooleanType => writer.addBoolean(record.getBoolean(index))
+      case DateType => writer.addInteger(record.getInt(index))
       case TimestampType => writeTimestamp(record(index).asInstanceOf[java.sql.Timestamp])
       case d: DecimalType =>
         if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
index d6ea6679c5966..9d17516e0ef7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
@@ -23,7 +23,6 @@ import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Try
 
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
 import org.apache.spark.util.Utils
 
@@ -67,8 +66,9 @@ private[sql] trait ParquetTest {
    * @todo Probably this method should be moved to a more general place
    */
   protected def withTempPath(f: File => Unit): Unit = {
-    val file = util.getTempFilePath("parquetTest").getCanonicalFile
-    try f(file) finally if (file.exists()) Utils.deleteRecursively(file)
+    val path = Utils.createTempDir()
+    path.delete()
+    try f(path) finally Utils.deleteRecursively(path)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
index 5209581fa8357..da668f068613b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
@@ -64,6 +64,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
       case ParquetPrimitiveTypeName.BOOLEAN => BooleanType
       case ParquetPrimitiveTypeName.DOUBLE => DoubleType
       case ParquetPrimitiveTypeName.FLOAT => FloatType
+      case ParquetPrimitiveTypeName.INT32
+        if originalType == ParquetOriginalType.DATE => DateType
       case ParquetPrimitiveTypeName.INT32 => IntegerType
       case ParquetPrimitiveTypeName.INT64 => LongType
       case ParquetPrimitiveTypeName.INT96 if int96AsTimestamp => TimestampType
@@ -222,6 +224,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
     // There is no type for Byte or Short so we promote them to INT32.
     case ShortType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
     case ByteType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
+    case DateType => Some(ParquetTypeInfo(
+      ParquetPrimitiveTypeName.INT32, Some(ParquetOriginalType.DATE)))
     case LongType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT64))
     case TimestampType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT96))
     case DecimalType.Fixed(precision, scale) if precision <= 18 =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
index 234e6bb8443af..410600b0529d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.parquet
 import java.io.IOException
 import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
 import java.math.{BigDecimal => JBigDecimal}
+import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Date, List => JList}
 
@@ -180,7 +181,7 @@ private[sql] case class ParquetRelation2(
   private val defaultPartitionName = parameters.getOrElse(
     ParquetRelation2.DEFAULT_PARTITION_NAME, "__HIVE_DEFAULT_PARTITION__")
 
-  override def equals(other: Any) = other match {
+  override def equals(other: Any): Boolean = other match {
     case relation: ParquetRelation2 =>
       // If schema merging is required, we don't compare the actual schemas since they may evolve.
       val schemaEquality = if (shouldMergeSchemas) {
@@ -197,6 +198,23 @@ private[sql] case class ParquetRelation2(
     case _ => false
   }
 
+  override def hashCode(): Int = {
+    if (shouldMergeSchemas) {
+      com.google.common.base.Objects.hashCode(
+        shouldMergeSchemas: java.lang.Boolean,
+        paths.toSet,
+        maybeMetastoreSchema,
+        maybePartitionSpec)
+    } else {
+      com.google.common.base.Objects.hashCode(
+        shouldMergeSchemas: java.lang.Boolean,
+        schema,
+        paths.toSet,
+        maybeMetastoreSchema,
+        maybePartitionSpec)
+    }
+  }
+
   private[sql] def sparkContext = sqlContext.sparkContext
 
   private class MetadataCache {
@@ -244,11 +262,10 @@ private[sql] case class ParquetRelation2(
      * Refreshes `FileStatus`es, footers, partition spec, and table schema.
      */
     def refresh(): Unit = {
-      val fs = FileSystem.get(sparkContext.hadoopConfiguration)
-
       // Support either reading a collection of raw Parquet part-files, or a collection of folders
       // containing Parquet files (e.g. partitioned Parquet table).
       val baseStatuses = paths.distinct.map { p =>
+        val fs = FileSystem.get(URI.create(p), sparkContext.hadoopConfiguration)
         val qualified = fs.makeQualified(new Path(p))
 
         if (!fs.exists(qualified) && maybeSchema.isDefined) {
@@ -262,6 +279,7 @@ private[sql] case class ParquetRelation2(
 
       // Lists `FileStatus`es of all leaf nodes (files) under all base directories.
       val leaves = baseStatuses.flatMap { f =>
+        val fs = FileSystem.get(f.getPath.toUri, sparkContext.hadoopConfiguration)
         SparkHadoopUtil.get.listLeafStatuses(fs, f.getPath).filter { f =>
           isSummaryFile(f.getPath) ||
             !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith("."))
@@ -369,19 +387,19 @@ private[sql] case class ParquetRelation2(
   @transient private val metadataCache = new MetadataCache
   metadataCache.refresh()
 
-  def partitionSpec = metadataCache.partitionSpec
+  def partitionSpec: PartitionSpec = metadataCache.partitionSpec
 
-  def partitionColumns = metadataCache.partitionSpec.partitionColumns
+  def partitionColumns: StructType = metadataCache.partitionSpec.partitionColumns
 
-  def partitions = metadataCache.partitionSpec.partitions
+  def partitions: Seq[Partition] = metadataCache.partitionSpec.partitions
 
-  def isPartitioned = partitionColumns.nonEmpty
+  def isPartitioned: Boolean = partitionColumns.nonEmpty
 
   private def partitionKeysIncludedInDataSchema = metadataCache.partitionKeysIncludedInParquetSchema
 
   private def parquetSchema = metadataCache.parquetSchema
 
-  override def schema = metadataCache.schema
+  override def schema: StructType = metadataCache.schema
 
   private def isSummaryFile(file: Path): Boolean = {
     file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
@@ -424,8 +442,10 @@ private[sql] case class ParquetRelation2(
       .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _))
 
     if (isPartitioned) {
-      def percentRead = selectedPartitions.size.toDouble / partitions.size.toDouble * 100
-      logInfo(s"Reading $percentRead% of partitions")
+      logInfo {
+        val percentRead = selectedPartitions.size.toDouble / partitions.size.toDouble * 100
+        s"Reading $percentRead% of partitions"
+      }
     }
 
     val requiredColumns = output.map(_.name)
@@ -591,13 +611,22 @@ private[sql] case class ParquetRelation2(
     val destinationPath = new Path(paths.head)
 
     if (overwrite) {
-      try {
-        destinationPath.getFileSystem(conf).delete(destinationPath, true)
-      } catch {
-        case e: IOException =>
+      val fs = destinationPath.getFileSystem(conf)
+      if (fs.exists(destinationPath)) {
+        var success: Boolean = false
+        try {
+          success = fs.delete(destinationPath, true)
+        } catch {
+          case e: IOException =>
+            throw new IOException(
+              s"Unable to clear output directory ${destinationPath.toString} prior" +
+                s" to writing to Parquet table:\n${e.toString}")
+        }
+        if (!success) {
           throw new IOException(
             s"Unable to clear output directory ${destinationPath.toString} prior" +
-              s" to writing to Parquet file:\n${e.toString}")
+              s" to writing to Parquet table.")
+        }
       }
     }
 
@@ -652,7 +681,7 @@ private[sql] case class ParquetRelation2(
   }
 }
 
-private[sql] object ParquetRelation2 {
+private[sql] object ParquetRelation2 extends Logging {
   // Whether we should merge schemas collected from all Parquet part-files.
   val MERGE_SCHEMA = "mergeSchema"
 
@@ -672,7 +701,26 @@ private[sql] object ParquetRelation2 {
         .getKeyValueMetaData
         .toMap
         .get(RowReadSupport.SPARK_METADATA_KEY)
-        .map(DataType.fromJson(_).asInstanceOf[StructType])
+        .flatMap { serializedSchema =>
+          // Don't throw even if we failed to parse the serialized Spark schema. Just fallback to
+          // whatever is available.
+          Try(DataType.fromJson(serializedSchema))
+            .recover { case _: Throwable =>
+              logInfo(
+                s"Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
+                  "falling back to the deprecated DataType.fromCaseClassString parser.")
+              DataType.fromCaseClassString(serializedSchema)
+            }
+            .recover { case cause: Throwable =>
+              logWarning(
+                s"""Failed to parse serialized Spark schema in Parquet key-value metadata:
+                   |\t$serializedSchema
+                 """.stripMargin,
+                cause)
+            }
+            .map(_.asInstanceOf[StructType])
+            .toOption
+        }
 
       maybeSparkSchema.getOrElse {
         // Falls back to Parquet schema if Spark SQL schema is absent.
@@ -702,7 +750,7 @@ private[sql] object ParquetRelation2 {
   private[parquet] def mergeMetastoreParquetSchema(
       metastoreSchema: StructType,
       parquetSchema: StructType): StructType = {
-    def schemaConflictMessage =
+    def schemaConflictMessage: String =
       s"""Converting Hive Metastore Parquet, but detected conflicting schemas. Metastore schema:
          |${metastoreSchema.prettyJson}
          |
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala
index e24475292ceaf..70bcca7526aae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/timestamp/NanoTime.scala
@@ -26,7 +26,7 @@ private[parquet] class NanoTime extends Serializable {
   private var julianDay = 0
   private var timeOfDayNanos = 0L
 
-  def set(julianDay: Int, timeOfDayNanos: Long) = {
+  def set(julianDay: Int, timeOfDayNanos: Long): this.type = {
     this.julianDay = julianDay
     this.timeOfDayNanos = timeOfDayNanos
     this
@@ -45,11 +45,11 @@ private[parquet] class NanoTime extends Serializable {
     Binary.fromByteBuffer(buf)
   }
 
-  def writeValue(recordConsumer: RecordConsumer) {
+  def writeValue(recordConsumer: RecordConsumer): Unit = {
     recordConsumer.addBinary(toBinary)
   }
 
-  override def toString =
+  override def toString: String =
     "NanoTime{julianDay=" + julianDay + ", timeOfDayNanos=" + timeOfDayNanos + "}"
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
index 12b59ba20bb10..f374abffdd505 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/LogicalRelation.scala
@@ -30,24 +30,28 @@ private[sql] case class LogicalRelation(relation: BaseRelation)
   override val output: Seq[AttributeReference] = relation.schema.toAttributes
 
   // Logical Relations are distinct if they have different output for the sake of transformations.
-  override def equals(other: Any) = other match {
+  override def equals(other: Any): Boolean = other match {
     case l @ LogicalRelation(otherRelation) => relation == otherRelation && output == l.output
     case  _ => false
   }
 
-  override def sameResult(otherPlan: LogicalPlan) = otherPlan match {
+  override def hashCode: Int = {
+    com.google.common.base.Objects.hashCode(relation, output)
+  }
+
+  override def sameResult(otherPlan: LogicalPlan): Boolean = otherPlan match {
     case LogicalRelation(otherRelation) => relation == otherRelation
     case _ => false
   }
 
-  @transient override lazy val statistics = Statistics(
+  @transient override lazy val statistics: Statistics = Statistics(
     sizeInBytes = BigInt(relation.sizeInBytes)
   )
 
   /** Used to lookup original attribute capitalization */
-  val attributeMap = AttributeMap(output.map(o => (o, o)))
+  val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
 
-  def newInstance() = LogicalRelation(relation).asInstanceOf[this.type]
+  def newInstance(): this.type = LogicalRelation(relation).asInstanceOf[this.type]
 
-  override def simpleString = s"Relation[${output.mkString(",")}] $relation"
+  override def simpleString: String = s"Relation[${output.mkString(",")}] $relation"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
index 0e540dad81283..9bbe06e59ba30 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala
@@ -27,7 +27,7 @@ private[sql] case class InsertIntoDataSource(
     overwrite: Boolean)
   extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val relation = logicalRelation.relation.asInstanceOf[InsertableRelation]
     val data = DataFrame(sqlContext, query)
     // Apply the schema of the existing table to the new data.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
index 76754a6ce4617..d2e807d3a69b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala
@@ -34,7 +34,8 @@ import org.apache.spark.util.Utils
  * A parser for foreign DDL commands.
  */
 private[sql] class DDLParser(
-    parseQuery: String => LogicalPlan) extends AbstractSparkSQLParser with Logging {
+    parseQuery: String => LogicalPlan)
+  extends AbstractSparkSQLParser with DataTypeParser with Logging {
 
   def apply(input: String, exceptionOnError: Boolean): Option[LogicalPlan] = {
     try {
@@ -46,14 +47,6 @@ private[sql] class DDLParser(
     }
   }
 
-  def parseType(input: String): DataType = {
-    lexical.initialize(reservedWords)
-    phrase(dataType)(new lexical.Scanner(input)) match {
-      case Success(r, x) => r
-      case x => throw new DDLException(s"Unsupported dataType: $x")
-    }
-  }
-
   // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
   // properties via reflection the class in runtime for constructing the SqlLexical object
   protected val CREATE = Keyword("CREATE")
@@ -70,24 +63,6 @@ private[sql] class DDLParser(
   protected val COMMENT = Keyword("COMMENT")
   protected val REFRESH = Keyword("REFRESH")
 
-  // Data types.
-  protected val STRING = Keyword("STRING")
-  protected val BINARY = Keyword("BINARY")
-  protected val BOOLEAN = Keyword("BOOLEAN")
-  protected val TINYINT = Keyword("TINYINT")
-  protected val SMALLINT = Keyword("SMALLINT")
-  protected val INT = Keyword("INT")
-  protected val BIGINT = Keyword("BIGINT")
-  protected val FLOAT = Keyword("FLOAT")
-  protected val DOUBLE = Keyword("DOUBLE")
-  protected val DECIMAL = Keyword("DECIMAL")
-  protected val DATE = Keyword("DATE")
-  protected val TIMESTAMP = Keyword("TIMESTAMP")
-  protected val VARCHAR = Keyword("VARCHAR")
-  protected val ARRAY = Keyword("ARRAY")
-  protected val MAP = Keyword("MAP")
-  protected val STRUCT = Keyword("STRUCT")
-
   protected lazy val ddl: Parser[LogicalPlan] = createTable | describeTable | refreshTable
 
   protected def start: Parser[LogicalPlan] = ddl
@@ -189,58 +164,9 @@ private[sql] class DDLParser(
           new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build()
         case None => Metadata.empty
       }
-      StructField(columnName, typ, nullable = true, meta)
-    }
-
-  protected lazy val primitiveType: Parser[DataType] =
-    STRING ^^^ StringType |
-    BINARY ^^^ BinaryType |
-    BOOLEAN ^^^ BooleanType |
-    TINYINT ^^^ ByteType |
-    SMALLINT ^^^ ShortType |
-    INT ^^^ IntegerType |
-    BIGINT ^^^ LongType |
-    FLOAT ^^^ FloatType |
-    DOUBLE ^^^ DoubleType |
-    fixedDecimalType |                   // decimal with precision/scale
-    DECIMAL ^^^ DecimalType.Unlimited |  // decimal with no precision/scale
-    DATE ^^^ DateType |
-    TIMESTAMP ^^^ TimestampType |
-    VARCHAR ~ "(" ~ numericLit ~ ")" ^^^ StringType
-
-  protected lazy val fixedDecimalType: Parser[DataType] =
-    (DECIMAL ~ "(" ~> numericLit) ~ ("," ~> numericLit <~ ")") ^^ {
-      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
-    }
-
-  protected lazy val arrayType: Parser[DataType] =
-    ARRAY ~> "<" ~> dataType <~ ">" ^^ {
-      case tpe => ArrayType(tpe)
-    }
 
-  protected lazy val mapType: Parser[DataType] =
-    MAP ~> "<" ~> dataType ~ "," ~ dataType <~ ">" ^^ {
-      case t1 ~ _ ~ t2 => MapType(t1, t2)
-    }
-
-  protected lazy val structField: Parser[StructField] =
-    ident ~ ":" ~ dataType ^^ {
-      case fieldName ~ _ ~ tpe => StructField(fieldName, tpe, nullable = true)
+      StructField(columnName, typ, nullable = true, meta)
     }
-
-  protected lazy val structType: Parser[DataType] =
-    (STRUCT ~> "<" ~> repsep(structField, ",") <~ ">" ^^ {
-    case fields => StructType(fields)
-    }) |
-    (STRUCT ~> "<>" ^^ {
-      case fields => StructType(Nil)
-    })
-
-  private[sql] lazy val dataType: Parser[DataType] =
-    arrayType |
-    mapType |
-    structType |
-    primitiveType
 }
 
 private[sql] object ResolvedDataSource {
@@ -362,7 +288,7 @@ private[sql] case class CreateTableUsingAsSelect(
     mode: SaveMode,
     options: Map[String, String],
     child: LogicalPlan) extends UnaryNode {
-  override def output = Seq.empty[Attribute]
+  override def output: Seq[Attribute] = Seq.empty[Attribute]
   // TODO: Override resolved after we support databaseName.
   // override lazy val resolved = databaseName != None && childrenResolved
 }
@@ -373,7 +299,7 @@ private[sql] case class CreateTempTableUsing(
     provider: String,
     options: Map[String, String]) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext) = {
+  def run(sqlContext: SQLContext): Seq[Row] = {
     val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options)
     sqlContext.registerDataFrameAsTable(
       DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName)
@@ -388,7 +314,7 @@ private[sql] case class CreateTempTableUsingAsSelect(
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
-  def run(sqlContext: SQLContext) = {
+  def run(sqlContext: SQLContext): Seq[Row] = {
     val df = DataFrame(sqlContext, query)
     val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df)
     sqlContext.registerDataFrameAsTable(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
index cfa58f1442218..5a78001117d1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala
@@ -53,7 +53,7 @@ private[sql] object PreInsertCastAndRename extends Rule[LogicalPlan] {
   def castAndRenameChildOutput(
       insertInto: InsertIntoTable,
       expectedOutput: Seq[Attribute],
-      child: LogicalPlan) = {
+      child: LogicalPlan): InsertIntoTable = {
     val newChildOutput = expectedOutput.zip(child.output).map {
       case (expected, actual) =>
         val needCast = !expected.dataType.sameType(actual.dataType)
@@ -79,7 +79,7 @@ private[sql] object PreInsertCastAndRename extends Rule[LogicalPlan] {
  * A rule to do various checks before inserting into or writing to a data source table.
  */
 private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan => Unit) {
-  def failAnalysis(msg: String) = { throw new AnalysisException(msg) }
+  def failAnalysis(msg: String): Unit = { throw new AnalysisException(msg) }
 
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 4e1ec38bd0158..356a6100d2cf5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -24,16 +24,22 @@ import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /** A SQLContext that can be used for local testing. */
-object TestSQLContext
+class LocalSQLContext
   extends SQLContext(
     new SparkContext(
       "local[2]",
       "TestSQLContext",
       new SparkConf().set("spark.sql.testkey", "true"))) {
 
-  /** Fewer partitions to speed up testing. */
-  protected[sql] override lazy val conf: SQLConf = new SQLConf {
-    override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+  override protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[sql] class SQLSession extends super.SQLSession {
+    protected[sql] override lazy val conf: SQLConf = new SQLConf {
+      /** Fewer partitions to speed up testing. */
+      override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+    }
   }
 
   /**
@@ -45,3 +51,6 @@ object TestSQLContext
   }
 
 }
+
+object TestSQLContext extends LocalSQLContext
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3036fbc05d021..a53ae97d6243a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.plans.logical.{Project, NoRelation}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext.implicits._
@@ -311,7 +313,9 @@ class ColumnExpressionSuite extends QueryTest {
   }
 
   test("lift alias out of cast") {
-    assert(col("1234").as("name").cast("int").expr === col("1234").cast("int").as("name").expr)
+    compareExpressions(
+      col("1234").as("name").cast("int").expr,
+      col("1234").cast("int").as("name").expr)
   }
 
   test("columns can be compared") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4dedcd365f6cc..a3c0076e16d6c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -36,6 +36,37 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
   import org.apache.spark.sql.test.TestSQLContext.implicits._
   val sqlCtx = TestSQLContext
 
+  test("self join with aliases") {
+    Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str").registerTempTable("df")
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT x.str, COUNT(*)
+          |FROM df x JOIN df y ON x.str = y.str
+          |GROUP BY x.str
+        """.stripMargin),
+      Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+  }
+
+  test("self join with alias in agg") {
+      Seq(1,2,3)
+        .map(i => (i, i.toString))
+        .toDF("int", "str")
+        .groupBy("str")
+        .agg($"str", count("str").as("strCount"))
+        .registerTempTable("df")
+
+    checkAnswer(
+      sql(
+        """
+          |SELECT x.str, SUM(x.strCount)
+          |FROM df x JOIN df y ON x.str = y.str
+          |GROUP BY x.str
+        """.stripMargin),
+      Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+  }
+
   test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") {
     checkAnswer(
       sql("SELECT a FROM testData2 SORT BY a"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 23f424c0bfc7c..fe618e0e8e767 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 import java.io.File
 
+import org.apache.spark.util.Utils
+
 import scala.beans.{BeanInfo, BeanProperty}
 
 import org.apache.spark.rdd.RDD
@@ -98,13 +100,13 @@ class UserDefinedTypeSuite extends QueryTest {
 
 
   test("UDTs with Parquet") {
-    val tempDir = File.createTempFile("parquet", "test")
+    val tempDir = Utils.createTempDir()
     tempDir.delete()
     pointsRDD.saveAsParquetFile(tempDir.getCanonicalPath)
   }
 
   test("Repartition UDTs with Parquet") {
-    val tempDir = File.createTempFile("parquet", "test")
+    val tempDir = Utils.createTempDir()
     tempDir.delete()
     pointsRDD.repartition(1).saveAsParquetFile(tempDir.getCanonicalPath)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index cd737c0b62767..5eb6ab2e92e8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -24,6 +24,7 @@ import java.util.{Calendar, GregorianCalendar}
 import org.apache.spark.sql.test._
 import org.scalatest.{FunSuite, BeforeAndAfter}
 import TestSQLContext._
+import TestSQLContext.implicits._
 
 class JDBCSuite extends FunSuite with BeforeAndAfter {
   val url = "jdbc:h2:mem:testdb0"
@@ -38,7 +39,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     conn.prepareStatement("create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
     conn.prepareStatement("insert into test.people values ('fred', 1)").executeUpdate()
     conn.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate()
-    conn.prepareStatement("insert into test.people values ('joe', 3)").executeUpdate()
+    conn.prepareStatement("insert into test.people values ('joe ''foo'' \"bar\"', 3)").executeUpdate()
     conn.commit()
 
     sql(
@@ -129,13 +130,20 @@ class JDBCSuite extends FunSuite with BeforeAndAfter {
     assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size == 0)
     assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size == 2)
     assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size == 1)
+    assert(sql("SELECT * FROM foobar WHERE NAME = 'fred'").collect().size == 1)
+    assert(sql("SELECT * FROM foobar WHERE NAME > 'fred'").collect().size == 2)
+    assert(sql("SELECT * FROM foobar WHERE NAME != 'fred'").collect().size == 2)
+  }
+
+  test("SELECT * WHERE (quoted strings)") {
+    assert(sql("select * from foobar").where('NAME === "joe 'foo' \"bar\"").collect().size == 1)
   }
 
   test("SELECT first field") {
     val names = sql("SELECT NAME FROM foobar").collect().map(x => x.getString(0)).sortWith(_ < _)
     assert(names.size == 3)
     assert(names(0).equals("fred"))
-    assert(names(1).equals("joe"))
+    assert(names(1).equals("joe 'foo' \"bar\""))
     assert(names(2).equals("mary"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 320b80d80e997..706c966ee05f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -22,7 +22,6 @@ import java.sql.{Date, Timestamp}
 import org.scalactic.Tolerance._
 
 import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.json.JsonRDD.{compatibleType, enforceCorrectType}
 import org.apache.spark.sql.sources.LogicalRelation
@@ -31,6 +30,7 @@ import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.test.TestSQLContext.implicits._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.{QueryTest, Row, SQLConf}
+import org.apache.spark.util.Utils
 
 class JsonSuite extends QueryTest {
   import org.apache.spark.sql.json.TestJsonData._
@@ -554,8 +554,9 @@ class JsonSuite extends QueryTest {
   }
 
   test("jsonFile should be based on JSONRelation") {
-    val file = getTempFilePath("json")
-    val path = file.toString
+    val dir = Utils.createTempDir()
+    dir.delete()
+    val path = dir.getCanonicalPath
     sparkContext.parallelize(1 to 100).map(i => s"""{"a": 1, "b": "str$i"}""").saveAsTextFile(path)
     val jsonDF = jsonFile(path, 0.49)
 
@@ -580,8 +581,9 @@ class JsonSuite extends QueryTest {
   }
 
   test("Loading a JSON dataset from a text file") {
-    val file = getTempFilePath("json")
-    val path = file.toString
+    val dir = Utils.createTempDir()
+    dir.delete()
+    val path = dir.getCanonicalPath
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
     val jsonDF = jsonFile(path)
 
@@ -611,8 +613,9 @@ class JsonSuite extends QueryTest {
   }
 
   test("Loading a JSON dataset from a text file with SQL") {
-    val file = getTempFilePath("json")
-    val path = file.toString
+    val dir = Utils.createTempDir()
+    dir.delete()
+    val path = dir.getCanonicalPath
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
 
     sql(
@@ -637,8 +640,9 @@ class JsonSuite extends QueryTest {
   }
 
   test("Applying schemas") {
-    val file = getTempFilePath("json")
-    val path = file.toString
+    val dir = Utils.createTempDir()
+    dir.delete()
+    val path = dir.getCanonicalPath
     primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path)
 
     val schema = StructType(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
index 5065ee842d97a..b0fb81ec52db0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala
@@ -28,8 +28,8 @@ import parquet.example.data.simple.SimpleGroup
 import parquet.example.data.{Group, GroupWriter}
 import parquet.hadoop.api.WriteSupport
 import parquet.hadoop.api.WriteSupport.WriteContext
-import parquet.hadoop.metadata.CompressionCodecName
-import parquet.hadoop.{ParquetFileWriter, ParquetWriter}
+import parquet.hadoop.metadata.{ParquetMetadata, FileMetaData, CompressionCodecName}
+import parquet.hadoop.{Footer, ParquetFileWriter, ParquetWriter}
 import parquet.io.api.RecordConsumer
 import parquet.schema.{MessageType, MessageTypeParser}
 
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.test.TestSQLContext
 import org.apache.spark.sql.test.TestSQLContext._
 import org.apache.spark.sql.test.TestSQLContext.implicits._
-import org.apache.spark.sql.types.DecimalType
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf, SaveMode}
 
 // Write support class for nested groups: ParquetWriter initializes GroupWriteSupport
@@ -135,6 +135,21 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     }
   }
 
+  test("date type") {
+    def makeDateRDD(): DataFrame =
+      sparkContext
+        .parallelize(0 to 1000)
+        .map(i => Tuple1(DateUtils.toJavaDate(i)))
+        .toDF()
+        .select($"_1")
+
+    withTempPath { dir =>
+      val data = makeDateRDD()
+      data.saveAsParquetFile(dir.getCanonicalPath)
+      checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq)
+    }
+  }
+
   test("map") {
     val data = (1 to 4).map(i => Tuple1(Map(i -> s"val_$i")))
     checkParquetFile(data)
@@ -330,7 +345,46 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest {
     }
   }
 
+  test("SPARK-6315 regression test") {
+    // Spark 1.1 and prior versions write Spark schema as case class string into Parquet metadata.
+    // This has been deprecated by JSON format since 1.2.  Notice that, 1.3 further refactored data
+    // types API, and made StructType.fields an array.  This makes the result of StructType.toString
+    // different from prior versions: there's no "Seq" wrapping the fields part in the string now.
+    val sparkSchema =
+      "StructType(Seq(StructField(a,BooleanType,false),StructField(b,IntegerType,false)))"
+
+    // The Parquet schema is intentionally made different from the Spark schema.  Because the new
+    // Parquet data source simply falls back to the Parquet schema once it fails to parse the Spark
+    // schema.  By making these two different, we are able to assert the old style case class string
+    // is parsed successfully.
+    val parquetSchema = MessageTypeParser.parseMessageType(
+      """message root {
+        |  required int32 c;
+        |}
+      """.stripMargin)
+
+    withTempPath { location =>
+      val extraMetadata = Map(RowReadSupport.SPARK_METADATA_KEY -> sparkSchema.toString)
+      val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, "Spark")
+      val path = new Path(location.getCanonicalPath)
+
+      ParquetFileWriter.writeMetadataFile(
+        sparkContext.hadoopConfiguration,
+        path,
+        new Footer(path, new ParquetMetadata(fileMetadata, Nil)) :: Nil)
+
+      assertResult(parquetFile(path.toString).schema) {
+        StructType(
+          StructField("a", BooleanType, nullable = false) ::
+          StructField("b", IntegerType, nullable = false) ::
+          Nil)
+      }
+    }
+  }
+
   test("SPARK-6352 DirectParquetOutputCommitter") {
+    // Write to a parquet file and let it fail.
+    // _temporary should be missing if direct output committer works.
     try {
       configuration.set("spark.sql.parquet.output.committer.class",
         "org.apache.spark.sql.parquet.DirectParquetOutputCommitter")
@@ -360,6 +414,18 @@ class ParquetDataSourceOnIOSuite extends ParquetIOSuiteBase with BeforeAndAfterA
   override protected def afterAll(): Unit = {
     sqlContext.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalConf.toString)
   }
+
+  test("SPARK-6330 regression test") {
+    // In 1.3.0, save to fs other than file: without configuring core-site.xml would get:
+    // IllegalArgumentException: Wrong FS: hdfs://..., expected: file:///
+    intercept[java.io.FileNotFoundException] {
+      sqlContext.parquetFile("file:///nonexistent")
+    }
+    val errorMessage = intercept[Throwable] {
+      sqlContext.parquetFile("hdfs://nonexistent")
+    }.toString
+    assert(errorMessage.contains("UnknownHostException"))
+  }
 }
 
 class ParquetDataSourceOffIOSuite extends ParquetIOSuiteBase with BeforeAndAfterAll {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index ad880e2bc3679..321832cd43211 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -57,7 +57,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
       |}
     """.stripMargin)
 
-  testSchema[(Byte, Short, Int, Long)](
+  testSchema[(Byte, Short, Int, Long, java.sql.Date)](
     "logical integral types",
     """
       |message root {
@@ -65,6 +65,7 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
       |  required int32 _2 (INT_16);
       |  required int32 _3 (INT_32);
       |  required int64 _4 (INT_64);
+      |  optional int32 _5 (DATE);
       |}
     """.stripMargin)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 60355414a40fa..20a23b3bd6aa9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.sources
 
-import java.io.File
+import java.io.{IOException, File}
 
 import org.apache.spark.sql.AnalysisException
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.util.Utils
 
 class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
@@ -32,7 +31,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
   var path: File = null
 
   override def beforeAll(): Unit = {
-    path = util.getTempFilePath("jsonCTAS").getCanonicalFile
+    path = Utils.createTempDir()
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
     jsonRDD(rdd).registerTempTable("jt")
   }
@@ -42,7 +41,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
   }
 
   after {
-    if (path.exists()) Utils.deleteRecursively(path)
+    Utils.deleteRecursively(path)
   }
 
   test("CREATE TEMPORARY TABLE AS SELECT") {
@@ -63,6 +62,29 @@ class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll {
     dropTempTable("jsonTable")
   }
 
+  test("CREATE TEMPORARY TABLE AS SELECT based on the file without write permission") {
+    val childPath = new File(path.toString, "child")
+    path.mkdir()
+    childPath.createNewFile()
+    path.setWritable(false)
+
+    val e = intercept[IOException] {
+      sql(
+        s"""
+           |CREATE TEMPORARY TABLE jsonTable
+           |USING org.apache.spark.sql.json.DefaultSource
+           |OPTIONS (
+           |  path '${path.toString}'
+           |) AS
+           |SELECT a, b FROM jt
+        """.stripMargin)
+      sql("SELECT a, b FROM jsonTable").collect()
+    }
+    assert(e.getMessage().contains("Unable to clear output directory"))
+
+    path.setWritable(true)
+  }
+
   test("create a table, drop it and create another one with the same name") {
     sql(
       s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index b5b16f9546691..80efe9728fbc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -22,7 +22,6 @@ import java.io.File
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.util.Utils
 
 class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
@@ -32,7 +31,7 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
   var path: File = null
 
   override def beforeAll: Unit = {
-    path = util.getTempFilePath("jsonCTAS").getCanonicalFile
+    path = Utils.createTempDir()
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
     jsonRDD(rdd).registerTempTable("jt")
     sql(
@@ -48,7 +47,7 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll {
   override def afterAll: Unit = {
     dropTempTable("jsonTable")
     dropTempTable("jt")
-    if (path.exists()) Utils.deleteRecursively(path)
+    Utils.deleteRecursively(path)
   }
 
   test("Simple INSERT OVERWRITE a JSONRelation") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 607488ccfdd6a..43bc8eb2d11a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -21,7 +21,6 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.{SaveMode, SQLConf, DataFrame}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -39,7 +38,8 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
   override def beforeAll(): Unit = {
     originalDefaultSource = conf.defaultDataSourceName
 
-    path = util.getTempFilePath("datasource").getCanonicalFile
+    path = Utils.createTempDir()
+    path.delete()
 
     val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
     df = jsonRDD(rdd)
@@ -52,7 +52,7 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll {
 
   after {
     conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource)
-    if (path.exists()) Utils.deleteRecursively(path)
+    Utils.deleteRecursively(path)
   }
 
   def checkLoad(): Unit = {
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index f466a3c0b5dc2..a96b1ffc26966 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 401e97b162dea..6272cdedb3e48 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -194,28 +194,29 @@ private[hive] object SparkSQLCLIDriver {
     val currentDB = ReflectionUtils.invokeStatic(classOf[CliDriver], "getFormattedDb",
       classOf[HiveConf] -> conf, classOf[CliSessionState] -> sessionState)
 
-    def promptWithCurrentDB = s"$prompt$currentDB"
-    def continuedPromptWithDBSpaces = continuedPrompt + ReflectionUtils.invokeStatic(
+    def promptWithCurrentDB: String = s"$prompt$currentDB"
+    def continuedPromptWithDBSpaces: String = continuedPrompt + ReflectionUtils.invokeStatic(
       classOf[CliDriver], "spacesForString", classOf[String] -> currentDB)
 
     var currentPrompt = promptWithCurrentDB
     var line = reader.readLine(currentPrompt + "> ")
 
     while (line != null) {
-      if (prefix.nonEmpty) {
-        prefix += '\n'
-      }
+      if (!line.startsWith("--")) {
+        if (prefix.nonEmpty) {
+          prefix += '\n'
+        }
 
-      if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
-        line = prefix + line
-        ret = cli.processLine(line, true)
-        prefix = ""
-        currentPrompt = promptWithCurrentDB
-      } else {
-        prefix = prefix + line
-        currentPrompt = continuedPromptWithDBSpaces
+        if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) {
+          line = prefix + line
+          ret = cli.processLine(line, true)
+          prefix = ""
+          currentPrompt = promptWithCurrentDB
+        } else {
+          prefix = prefix + line
+          currentPrompt = continuedPromptWithDBSpaces
+        }
       }
-
       line = reader.readLine(currentPrompt + "> ")
     }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
deleted file mode 100644
index 89e9ede7261c9..0000000000000
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.thriftserver
-
-import java.util.concurrent.Executors
-
-import org.apache.commons.logging.Log
-import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hive.service.cli.session.SessionManager
-
-import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
-import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager
-import org.apache.hive.service.cli.SessionHandle
-
-private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
-  extends SessionManager
-  with ReflectedCompositeService {
-
-  private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
-
-  override def init(hiveConf: HiveConf) {
-    setSuperField(this, "hiveConf", hiveConf)
-
-    val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS)
-    setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize))
-    getAncestorField[Log](this, 3, "LOG").info(
-      s"HiveServer2: Async execution pool size $backgroundPoolSize")
-
-    setSuperField(this, "operationManager", sparkSqlOperationManager)
-    addService(sparkSqlOperationManager)
-
-    initCompositeService(hiveConf)
-  }
-
-  override def closeSession(sessionHandle: SessionHandle) {
-    super.closeSession(sessionHandle)
-    sparkSqlOperationManager.sessionToActivePool -= sessionHandle
-  }
-}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 8bca4b33b3ad1..75738fa22b572 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.catalyst.util.getTempFilePath
+import org.apache.spark.util.Utils
 
 class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
   def runCliWithin(
@@ -38,8 +38,10 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
       queriesAndExpectedAnswers: (String, String)*) {
 
     val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip
-    val warehousePath = getTempFilePath("warehouse")
-    val metastorePath = getTempFilePath("metastore")
+    val warehousePath = Utils.createTempDir()
+    warehousePath.delete()
+    val metastorePath = Utils.createTempDir()
+    metastorePath.delete()
     val cliScript = "../../bin/spark-sql".split("/").mkString(File.separator)
 
     val command = {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index d783d487b5c60..bf20acecb1f32 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -37,7 +37,6 @@ import org.apache.thrift.transport.TSocket
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.hive.HiveShim
 import org.apache.spark.util.Utils
 
@@ -195,6 +194,146 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       }
     }
   }
+
+  test("test multiple session") {
+    import org.apache.spark.sql.SQLConf
+    var defaultV1: String = null
+    var defaultV2: String = null
+
+    withMultipleConnectionJdbcStatement(
+      // create table
+      { statement =>
+
+        val queries = Seq(
+            "DROP TABLE IF EXISTS test_map",
+            "CREATE TABLE test_map(key INT, value STRING)",
+            s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map",
+            "CACHE TABLE test_table AS SELECT key FROM test_map ORDER BY key DESC")
+
+        queries.foreach(statement.execute)
+
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+      },
+
+      // first session, we get the default value of the session status
+      { statement =>
+
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        defaultV1 = rs1.getString(1)
+        assert(defaultV1 != "200")
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+
+        defaultV2 = rs2.getString(1)
+        assert(defaultV1 != "true")
+        rs2.close()
+      },
+
+      // second session, we update the session status
+      { statement =>
+
+        val queries = Seq(
+            s"SET ${SQLConf.SHUFFLE_PARTITIONS}=291",
+            "SET hive.cli.print.header=true"
+            )
+
+        queries.map(statement.execute)
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        assert("spark.sql.shuffle.partitions=291" === rs1.getString(1))
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+        assert("hive.cli.print.header=true" === rs2.getString(1))
+        rs2.close()
+      },
+
+      // third session, we get the latest session status, supposed to be the
+      // default value
+      { statement =>
+
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        assert(defaultV1 === rs1.getString(1))
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+        assert(defaultV2 === rs2.getString(1))
+        rs2.close()
+      },
+
+      // accessing the cached data in another session
+      { statement =>
+
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+        statement.executeQuery("UNCACHE TABLE test_table")
+
+        // TODO need to figure out how to determine if the data loaded from cache
+        val rs3 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf3 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs3.next()) {
+          buf3 += rs3.getInt(1)
+        }
+        rs3.close()
+
+        assert(buf1 === buf3)
+      },
+
+      // accessing the uncached table
+      { statement =>
+
+        // TODO need to figure out how to determine if the data loaded from cache
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+      }
+    )
+  }
 }
 
 class HiveThriftHttpServerSuite extends HiveThriftJdbcTest {
@@ -245,15 +384,22 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     s"jdbc:hive2://localhost:$serverPort/"
   }
 
-  protected def withJdbcStatement(f: Statement => Unit): Unit = {
-    val connection = DriverManager.getConnection(jdbcUri, user, "")
-    val statement = connection.createStatement()
-
-    try f(statement) finally {
-      statement.close()
-      connection.close()
+  def withMultipleConnectionJdbcStatement(fs: (Statement => Unit)*) {
+    val user = System.getProperty("user.name")
+    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).map { case (s, f) => f(s) }
+    } finally {
+      statements.map(_.close())
+      connections.map(_.close())
     }
   }
+
+  def withJdbcStatement(f: Statement => Unit) {
+    withMultipleConnectionJdbcStatement(f)
+  }
 }
 
 abstract class HiveThriftServer2Test extends FunSuite with BeforeAndAfterAll with Logging {
@@ -300,8 +446,10 @@ abstract class HiveThriftServer2Test extends FunSuite with BeforeAndAfterAll wit
   }
 
   private def startThriftServer(port: Int, attempt: Int) = {
-    warehousePath = util.getTempFilePath("warehouse")
-    metastorePath = util.getTempFilePath("metastore")
+    warehousePath = Utils.createTempDir()
+    warehousePath.delete()
+    metastorePath = Utils.createTempDir()
+    metastorePath.delete()
     logPath = null
     logTailingProcess = null
 
diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
index 13116b40bb259..95a6e86d0546d 100644
--- a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
+++ b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala
@@ -18,8 +18,15 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.sql.{Date, Timestamp}
+import java.util.concurrent.Executors
 import java.util.{ArrayList => JArrayList, Map => JMap}
 
+import org.apache.commons.logging.Log
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hive.service.cli.thrift.TProtocolVersion
+import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager
+
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, Map => SMap}
 
@@ -29,7 +36,7 @@ import org.apache.hadoop.hive.shims.ShimLoader
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.ExecuteStatementOperation
-import org.apache.hive.service.cli.session.HiveSession
+import org.apache.hive.service.cli.session.{SessionManager, HiveSession}
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.{DataFrame, SQLConf, Row => SparkRow}
@@ -220,3 +227,42 @@ private[hive] class SparkExecuteStatementOperation(
     setState(OperationState.FINISHED)
   }
 }
+
+private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
+  extends SessionManager
+  with ReflectedCompositeService {
+
+  private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
+
+  override def init(hiveConf: HiveConf) {
+    setSuperField(this, "hiveConf", hiveConf)
+
+    val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS)
+    setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize))
+    getAncestorField[Log](this, 3, "LOG").info(
+      s"HiveServer2: Async execution pool size $backgroundPoolSize")
+
+    setSuperField(this, "operationManager", sparkSqlOperationManager)
+    addService(sparkSqlOperationManager)
+
+    initCompositeService(hiveConf)
+  }
+
+  override def openSession(
+      username: String,
+      passwd: String,
+      sessionConf: java.util.Map[String, String],
+      withImpersonation: Boolean,
+      delegationToken: String): SessionHandle = {
+    hiveContext.openSession()
+
+    super.openSession(username, passwd, sessionConf, withImpersonation, delegationToken)
+  }
+
+  override def closeSession(sessionHandle: SessionHandle) {
+    super.closeSession(sessionHandle)
+    sparkSqlOperationManager.sessionToActivePool -= sessionHandle
+
+    hiveContext.detachSession()
+  }
+}
diff --git a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
index 9b8faeff94eab..178eb1af7cdcd 100644
--- a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
+++ b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala
@@ -18,8 +18,15 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.sql.{Date, Timestamp}
+import java.util.concurrent.Executors
 import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
+import org.apache.commons.logging.Log
+import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hive.service.cli.thrift.TProtocolVersion
+import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager
+
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, Map => SMap}
 
@@ -27,7 +34,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.ExecuteStatementOperation
-import org.apache.hive.service.cli.session.HiveSession
+import org.apache.hive.service.cli.session.{SessionManager, HiveSession}
 
 import org.apache.spark.Logging
 import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLConf}
@@ -191,3 +198,43 @@ private[hive] class SparkExecuteStatementOperation(
     setState(OperationState.FINISHED)
   }
 }
+
+private[hive] class SparkSQLSessionManager(hiveContext: HiveContext)
+  extends SessionManager
+  with ReflectedCompositeService {
+
+  private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext)
+
+  override def init(hiveConf: HiveConf) {
+    setSuperField(this, "hiveConf", hiveConf)
+
+    val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS)
+    setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize))
+    getAncestorField[Log](this, 3, "LOG").info(
+      s"HiveServer2: Async execution pool size $backgroundPoolSize")
+
+    setSuperField(this, "operationManager", sparkSqlOperationManager)
+    addService(sparkSqlOperationManager)
+
+    initCompositeService(hiveConf)
+  }
+
+  override def openSession(
+      protocol: TProtocolVersion,
+      username: String,
+      passwd: String,
+      sessionConf: java.util.Map[String, String],
+      withImpersonation: Boolean,
+      delegationToken: String): SessionHandle = {
+    hiveContext.openSession()
+
+    super.openSession(protocol, username, passwd, sessionConf, withImpersonation, delegationToken)
+  }
+
+  override def closeSession(sessionHandle: SessionHandle) {
+    super.closeSession(sessionHandle)
+    sparkSqlOperationManager.sessionToActivePool -= sessionHandle
+
+    hiveContext.detachSession()
+  }
+}
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 6126ce7130426..2ae9d018e1b1b 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -236,7 +236,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
 
     // timestamp in array, the output format of Hive contains double quotes, while
     // Spark SQL doesn't
-    "udf_sort_array"
+    "udf_sort_array",
+
+    // It has a bug and it has been fixed by
+    // https://issues.apache.org/jira/browse/HIVE-7673 (in Hive 0.14 and trunk).
+    "input46"
   ) ++ HiveShim.compatibilityBlackList
 
   /**
@@ -726,6 +730,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "select_unquote_and",
     "select_unquote_not",
     "select_unquote_or",
+    "semicolon",
     "semijoin",
     "serde_regex",
     "serde_reported_schema",
@@ -795,6 +800,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udaf_covar_pop",
     "udaf_covar_samp",
     "udaf_histogram_numeric",
+    "udaf_number_format",
     "udf2",
     "udf5",
     "udf6",
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0e3f4eb98cbf7..a9816f6c38cd2 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index c439dfe0a71f8..c06c2e396bbc1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -49,10 +49,6 @@ import org.apache.spark.sql.types._
 class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   self =>
 
-  protected[sql] override lazy val conf: SQLConf = new SQLConf {
-    override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
-  }
-
   /**
    * When true, enables an experimental feature where metastore tables that use the parquet SerDe
    * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
@@ -187,7 +183,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
   // Circular buffer to hold what hive prints to STDOUT and ERR.  Only printed when failures occur.
   @transient
-  protected lazy val outputBuffer =  new java.io.OutputStream {
+  protected lazy val outputBuffer = new java.io.OutputStream {
     var pos: Int = 0
     var buffer = new Array[Int](10240)
     def write(i: Int): Unit = {
@@ -195,7 +191,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
       pos = (pos + 1) % buffer.size
     }
 
-    override def toString = {
+    override def toString: String = {
       val (end, start) = buffer.splitAt(pos)
       val input = new java.io.InputStream {
         val iterator = (start ++ end).iterator
@@ -214,33 +210,9 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     }
   }
 
-  /**
-   * SQLConf and HiveConf contracts:
-   *
-   * 1. reuse existing started SessionState if any
-   * 2. when the Hive session is first initialized, params in HiveConf will get picked up by the
-   *    SQLConf.  Additionally, any properties set by set() or a SET command inside sql() will be
-   *    set in the SQLConf *as well as* in the HiveConf.
-   */
-  @transient protected[hive] lazy val sessionState: SessionState = {
-    var state = SessionState.get()
-    if (state == null) {
-      state = new SessionState(new HiveConf(classOf[SessionState]))
-      SessionState.start(state)
-    }
-    if (state.out == null) {
-      state.out = new PrintStream(outputBuffer, true, "UTF-8")
-    }
-    if (state.err == null) {
-      state.err = new PrintStream(outputBuffer, true, "UTF-8")
-    }
-    state
-  }
+  protected[hive] def sessionState = tlSession.get().asInstanceOf[this.SQLSession].sessionState
 
-  @transient protected[hive] lazy val hiveconf: HiveConf = {
-    setConf(sessionState.getConf.getAllProperties)
-    sessionState.getConf
-  }
+  protected[hive] def hiveconf = tlSession.get().asInstanceOf[this.SQLSession].hiveconf
 
   override def setConf(key: String, value: String): Unit = {
     super.setConf(key, value)
@@ -255,7 +227,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   @transient
   override protected[sql] lazy val functionRegistry =
     new HiveFunctionRegistry with OverrideFunctionRegistry {
-      def caseSensitive = false
+      def caseSensitive: Boolean = false
     }
 
   /* An analyzer that uses the Hive metastore. */
@@ -272,6 +244,44 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         Nil
     }
 
+  override protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[hive] class SQLSession extends super.SQLSession {
+    protected[sql] override lazy val conf: SQLConf = new SQLConf {
+      override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+    }
+
+    protected[hive] lazy val hiveconf: HiveConf = {
+      setConf(sessionState.getConf.getAllProperties)
+      sessionState.getConf
+    }
+
+    /**
+     * SQLConf and HiveConf contracts:
+     *
+     * 1. reuse existing started SessionState if any
+     * 2. when the Hive session is first initialized, params in HiveConf will get picked up by the
+     *    SQLConf.  Additionally, any properties set by set() or a SET command inside sql() will be
+     *    set in the SQLConf *as well as* in the HiveConf.
+     */
+    protected[hive] lazy val sessionState: SessionState = {
+      var state = SessionState.get()
+      if (state == null) {
+        state = new SessionState(new HiveConf(classOf[SessionState]))
+        SessionState.start(state)
+      }
+      if (state.out == null) {
+        state.out = new PrintStream(outputBuffer, true, "UTF-8")
+      }
+      if (state.err == null) {
+        state.err = new PrintStream(outputBuffer, true, "UTF-8")
+      }
+      state
+    }
+  }
+
   /**
    * Runs the specified SQL query using Hive.
    */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index fe86bd206a71c..4c5eb48661f7d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -58,7 +58,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
   // TODO: Use this everywhere instead of tuples or databaseName, tableName,.
   /** A fully qualified identifier for a table (i.e., database.tableName) */
   case class QualifiedTableName(database: String, name: String) {
-    def toLowerCase = QualifiedTableName(database.toLowerCase, name.toLowerCase)
+    def toLowerCase: QualifiedTableName = QualifiedTableName(database.toLowerCase, name.toLowerCase)
   }
 
   /** A cache of Spark SQL data source tables that have been accessed. */
@@ -629,7 +629,8 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
         castChildOutput(p, table, child)
     }
 
-    def castChildOutput(p: InsertIntoTable, table: MetastoreRelation, child: LogicalPlan) = {
+    def castChildOutput(p: InsertIntoTable, table: MetastoreRelation, child: LogicalPlan)
+      : LogicalPlan = {
       val childOutputDataTypes = child.output.map(_.dataType)
       val tableOutputDataTypes =
         (table.attributes ++ table.partitionKeys).take(child.output.length).map(_.dataType)
@@ -667,7 +668,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
    */
   override def unregisterTable(tableIdentifier: Seq[String]): Unit = ???
 
-  override def unregisterAllTables() = {}
+  override def unregisterAllTables(): Unit = {}
 }
 
 /**
@@ -682,10 +683,10 @@ private[hive] case class InsertIntoHiveTable(
     overwrite: Boolean)
   extends LogicalPlan {
 
-  override def children = child :: Nil
-  override def output = child.output
+  override def children: Seq[LogicalPlan] = child :: Nil
+  override def output: Seq[Attribute] = child.output
 
-  override lazy val resolved = childrenResolved && child.output.zip(table.output).forall {
+  override lazy val resolved: Boolean = childrenResolved && child.output.zip(table.output).forall {
     case (childAttr, tableAttr) => childAttr.dataType.sameType(tableAttr.dataType)
   }
 }
@@ -704,13 +705,13 @@ private[hive] case class MetastoreRelation
   // org.apache.hadoop.hive.ql.metadata.Partition will cause a NotSerializableException
   // which indicates the SerDe we used is not Serializable.
 
-  @transient val hiveQlTable = new Table(table)
+  @transient val hiveQlTable: Table = new Table(table)
 
-  @transient val hiveQlPartitions = partitions.map { p =>
+  @transient val hiveQlPartitions: Seq[Partition] = partitions.map { p =>
     new Partition(hiveQlTable, p)
   }
 
-  @transient override lazy val statistics = Statistics(
+  @transient override lazy val statistics: Statistics = Statistics(
     sizeInBytes = {
       val totalSize = hiveQlTable.getParameters.get(HiveShim.getStatsSetupConstTotalSize)
       val rawDataSize = hiveQlTable.getParameters.get(HiveShim.getStatsSetupConstRawDataSize)
@@ -754,9 +755,9 @@ private[hive] case class MetastoreRelation
   )
 
   implicit class SchemaAttribute(f: FieldSchema) {
-    def toAttribute = AttributeReference(
+    def toAttribute: AttributeReference = AttributeReference(
       f.getName,
-      sqlContext.ddlParser.parseType(f.getType),
+      HiveMetastoreTypes.toDataType(f.getType),
       // Since data can be dumped in randomly with no validation, everything is nullable.
       nullable = true
     )(qualifiers = Seq(alias.getOrElse(tableName)))
@@ -779,11 +780,7 @@ private[hive] case class MetastoreRelation
 
 
 private[hive] object HiveMetastoreTypes {
-  protected val ddlParser = new DDLParser(HiveQl.parseSql(_))
-
-  def toDataType(metastoreType: String): DataType = synchronized {
-    ddlParser.parseType(metastoreType)
-  }
+  def toDataType(metastoreType: String): DataType = DataTypeParser(metastoreType)
 
   def toMetastoreType(dt: DataType): String = dt match {
     case ArrayType(elementType, _) => s"array<${toMetastoreType(elementType)}>"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index ced99cd082614..51775eb4cd6a0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -196,8 +196,8 @@ private[hive] object HiveQl {
      * Right now this function only checks the name, type, text and children of the node
      * for equality.
      */
-    def checkEquals(other: ASTNode) {
-      def check(field: String, f: ASTNode => Any) = if (f(n) != f(other)) {
+    def checkEquals(other: ASTNode): Unit = {
+      def check(field: String, f: ASTNode => Any): Unit = if (f(n) != f(other)) {
         sys.error(s"$field does not match for trees. " +
           s"'${f(n)}' != '${f(other)}' left: ${dumpTree(n)}, right: ${dumpTree(other)}")
       }
@@ -209,7 +209,7 @@ private[hive] object HiveQl {
       val leftChildren = nilIfEmpty(n.getChildren).asInstanceOf[Seq[ASTNode]]
       val rightChildren = nilIfEmpty(other.getChildren).asInstanceOf[Seq[ASTNode]]
       leftChildren zip rightChildren foreach {
-        case (l,r) => l checkEquals r
+        case (l, r) => l checkEquals r
       }
     }
   }
@@ -269,7 +269,7 @@ private[hive] object HiveQl {
   }
 
   /** Creates LogicalPlan for a given VIEW */
-  def createPlanForView(view: Table, alias: Option[String]) = alias match {
+  def createPlanForView(view: Table, alias: Option[String]): Subquery = alias match {
     // because hive use things like `_c0` to build the expanded text
     // currently we cannot support view from "create view v1(c1) as ..."
     case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText))
@@ -323,7 +323,7 @@ private[hive] object HiveQl {
     clauses
   }
 
-  def getClause(clauseName: String, nodeList: Seq[Node]) =
+  def getClause(clauseName: String, nodeList: Seq[Node]): Node =
     getClauseOption(clauseName, nodeList).getOrElse(sys.error(
       s"Expected clause $clauseName missing from ${nodeList.map(dumpTree(_)).mkString("\n")}"))
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index e63cea60457d9..5f7e897295117 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -58,9 +58,9 @@ private[hive] trait HiveStrategies {
   @Experimental
   object ParquetConversion extends Strategy {
     implicit class LogicalPlanHacks(s: DataFrame) {
-      def lowerCase = DataFrame(s.sqlContext, s.logicalPlan)
+      def lowerCase: DataFrame = DataFrame(s.sqlContext, s.logicalPlan)
 
-      def addPartitioningAttributes(attrs: Seq[Attribute]) = {
+      def addPartitioningAttributes(attrs: Seq[Attribute]): DataFrame = {
         // Don't add the partitioning key if its already present in the data.
         if (attrs.map(_.name).toSet.subsetOf(s.logicalPlan.output.map(_.name).toSet)) {
           s
@@ -75,7 +75,7 @@ private[hive] trait HiveStrategies {
     }
 
     implicit class PhysicalPlanHacks(originalPlan: SparkPlan) {
-      def fakeOutput(newOutput: Seq[Attribute]) =
+      def fakeOutput(newOutput: Seq[Attribute]): OutputFaker =
         OutputFaker(
           originalPlan.output.map(a =>
             newOutput.find(a.name.toLowerCase == _.name.toLowerCase)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index f22c9eaeedc7d..af309c0c6ce2c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -175,7 +175,7 @@ class HadoopTableReader(
           relation.partitionKeys.contains(attr)
         }
 
-      def fillPartitionKeys(rawPartValues: Array[String], row: MutableRow) = {
+      def fillPartitionKeys(rawPartValues: Array[String], row: MutableRow): Unit = {
         partitionKeyAttrs.foreach { case (attr, ordinal) =>
           val partOrdinal = relation.partitionKeys.indexOf(attr)
           row(ordinal) = Cast(Literal(rawPartValues(partOrdinal)), attr.dataType).eval(null)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index a0c91cbc4e86f..fade9e5852eaa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -45,7 +45,7 @@ case class CreateTableAsSelect(
     allowExisting: Boolean,
     desc: Option[CreateTableDesc]) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     lazy val metastoreRelation: MetastoreRelation = {
       // Create Hive Table
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
index d0510aa342796..6fce69b58b85e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/DescribeHiveTableCommand.scala
@@ -37,7 +37,7 @@ case class DescribeHiveTableCommand(
     override val output: Seq[Attribute],
     isExtended: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     // Trying to mimic the format of Hive's output. But not exactly the same.
     var results: Seq[(String, String, String)] = Nil
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
index 9636da206087f..60a9bb630d0d9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
@@ -26,9 +26,9 @@ import org.apache.spark.sql.types.StringType
 private[hive]
 case class HiveNativeCommand(sql: String) extends RunnableCommand {
 
-  override def output =
+  override def output: Seq[AttributeReference] =
     Seq(AttributeReference("result", StringType, nullable = false)())
 
-  override def run(sqlContext: SQLContext) =
+  override def run(sqlContext: SQLContext): Seq[Row] =
     sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(Row(_))
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 5b3cf2861e8ef..0a5f19eee7105 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.hive._
@@ -128,11 +129,11 @@ case class HiveTableScan(
     }
   }
 
-  override def execute() = if (!relation.hiveQlTable.isPartitioned) {
+  override def execute(): RDD[Row] = if (!relation.hiveQlTable.isPartitioned) {
     hadoopReader.makeRDDForTable(relation.hiveQlTable)
   } else {
     hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions))
   }
 
-  override def output = attributes
+  override def output: Seq[Attribute] = attributes
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index ba5c8e028a151..da53d30354551 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf}
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
 import org.apache.spark.sql.execution.{UnaryNode, SparkPlan}
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.{ ShimFileSinkDesc => FileSinkDesc}
@@ -58,7 +58,7 @@ case class InsertIntoHiveTable(
     serializer
   }
 
-  def output = child.output
+  def output: Seq[Attribute] = child.output
 
   def saveAsHiveFile(
       rdd: RDD[Row],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 0c9aee33985bc..8efed7f0299bf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.AbstractSerDe
 import org.apache.hadoop.hive.serde2.objectinspector._
 
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.ScriptInputOutputSchema
 import org.apache.spark.sql.execution._
@@ -51,9 +52,9 @@ case class ScriptTransformation(
     ioschema: HiveScriptIOSchema)(@transient sc: HiveContext)
   extends UnaryNode {
 
-  override def otherCopyArgs = sc :: Nil
+  override def otherCopyArgs: Seq[HiveContext] = sc :: Nil
 
-  def execute() = {
+  def execute(): RDD[Row] = {
     child.execute().mapPartitions { iter =>
       val cmd = List("/bin/bash", "-c", script)
       val builder = new ProcessBuilder(cmd)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 63ad145a6a980..4345ffbf30f77 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.types.StructType
 private[hive]
 case class AnalyzeTable(tableName: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     sqlContext.asInstanceOf[HiveContext].analyze(tableName)
     Seq.empty[Row]
   }
@@ -52,7 +52,7 @@ case class DropTable(
     tableName: String,
     ifExists: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val ifExistsClause = if (ifExists) "IF EXISTS " else ""
     try {
@@ -75,7 +75,7 @@ case class DropTable(
 private[hive]
 case class AddJar(path: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     hiveContext.runSqlHive(s"ADD JAR $path")
     hiveContext.sparkContext.addJar(path)
@@ -86,7 +86,7 @@ case class AddJar(path: String) extends RunnableCommand {
 private[hive]
 case class AddFile(path: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext) = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     hiveContext.runSqlHive(s"ADD FILE $path")
     hiveContext.sparkContext.addFile(path)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 34c21c11761ae..bfe43373d9534 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -45,7 +45,7 @@ import scala.collection.JavaConversions._
 private[hive] abstract class HiveFunctionRegistry
   extends analysis.FunctionRegistry with HiveInspectors {
 
-  def getFunctionInfo(name: String) = FunctionRegistry.getFunctionInfo(name)
+  def getFunctionInfo(name: String): FunctionInfo = FunctionRegistry.getFunctionInfo(name)
 
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
     // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
@@ -78,7 +78,7 @@ private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, childre
   type EvaluatedType = Any
   type UDFType = UDF
 
-  def nullable = true
+  override def nullable: Boolean = true
 
   @transient
   lazy val function = funcWrapper.createFunction[UDFType]()
@@ -96,7 +96,7 @@ private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, childre
     udfType != null && udfType.deterministic()
   }
 
-  override def foldable = isUDFDeterministic && children.forall(_.foldable)
+  override def foldable: Boolean = isUDFDeterministic && children.forall(_.foldable)
 
   // Create parameter converters
   @transient
@@ -110,7 +110,7 @@ private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, childre
     method.getGenericReturnType(), ObjectInspectorOptions.JAVA)
 
   @transient
-  protected lazy val cached = new Array[AnyRef](children.length)
+  protected lazy val cached: Array[AnyRef] = new Array[AnyRef](children.length)
 
   // TODO: Finish input output types.
   override def eval(input: Row): Any = {
@@ -120,17 +120,19 @@ private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, childre
       returnInspector)
   }
 
-  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
 }
 
 // Adapter from Catalyst ExpressionResult to Hive DeferredObject
 private[hive] class DeferredObjectAdapter(oi: ObjectInspector)
   extends DeferredObject with HiveInspectors {
   private var func: () => Any = _
-  def set(func: () => Any) {
+  def set(func: () => Any): Unit = {
     this.func = func
   }
-  override def prepare(i: Int) = {}
+  override def prepare(i: Int): Unit = {}
   override def get(): AnyRef = wrap(func(), oi)
 }
 
@@ -139,7 +141,7 @@ private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, childr
   type UDFType = GenericUDF
   type EvaluatedType = Any
 
-  def nullable = true
+  override def nullable: Boolean = true
 
   @transient
   lazy val function = funcWrapper.createFunction[UDFType]()
@@ -158,7 +160,7 @@ private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, childr
     (udfType != null && udfType.deterministic())
   }
 
-  override def foldable =
+  override def foldable: Boolean =
     isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
 
   @transient
@@ -182,7 +184,9 @@ private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, childr
     unwrap(function.evaluate(deferedObjects), returnInspector)
   }
 
-  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
 }
 
 private[hive] case class HiveGenericUdaf(
@@ -209,9 +213,11 @@ private[hive] case class HiveGenericUdaf(
 
   def nullable: Boolean = true
 
-  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
 
-  def newInstance() = new HiveUdafFunction(funcWrapper, children, this)
+  def newInstance(): HiveUdafFunction = new HiveUdafFunction(funcWrapper, children, this)
 }
 
 /** It is used as a wrapper for the hive functions which uses UDAF interface */
@@ -240,10 +246,11 @@ private[hive] case class HiveUdaf(
 
   def nullable: Boolean = true
 
-  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
 
-  def newInstance() =
-    new HiveUdafFunction(funcWrapper, children, this, true)
+  def newInstance(): HiveUdafFunction = new HiveUdafFunction(funcWrapper, children, this, true)
 }
 
 /**
@@ -314,25 +321,29 @@ private[hive] case class HiveGenericUdtf(
       collected += unwrap(input, outputInspector).asInstanceOf[Row]
     }
 
-    def collectRows() = {
+    def collectRows(): Seq[Row] = {
       val toCollect = collected
       collected = new ArrayBuffer[Row]
       toCollect
     }
   }
 
-  override def toString = s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
 }
 
 /**
  * Resolve Udtfs Alias.
  */
 private[spark] object ResolveUdtfsAlias extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan) = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case p @ Project(projectList, _)
       if projectList.exists(_.isInstanceOf[MultiAlias]) && projectList.size != 1 =>
       throw new TreeNodeException(p, "only single Generator supported for SELECT clause")
 
+    case Project(Seq(Alias(udtf @ HiveGenericUdtf(_, _, _), name)), child) =>
+        Generate(udtf.copy(aliasNames = Seq(name)), join = false, outer = false, None, child)
     case Project(Seq(MultiAlias(udtf @ HiveGenericUdtf(_, _, _), names)), child) =>
         Generate(udtf.copy(aliasNames = names), join = false, outer = false, None, child)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index f136e43acc8f2..ba2bf67aed684 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -222,7 +222,7 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer(
         s"/$col=$colString"
       }.mkString
 
-    def newWriter = {
+    def newWriter(): FileSinkOperator.RecordWriter = {
       val newFileSinkDesc = new FileSinkDesc(
         fileSinkConf.getDirName + dynamicPartPath,
         fileSinkConf.getTableInfo,
@@ -246,6 +246,6 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer(
         Reporter.NULL)
     }
 
-    writers.getOrElseUpdate(dynamicPartPath, newWriter)
+    writers.getOrElseUpdate(dynamicPartPath, newWriter())
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a2d99f1f4b28d..dc61e9d2e3522 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.serde2.avro.AvroSerDe
 import org.apache.spark.sql.SQLConf
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.execution.HiveNativeCommand
@@ -69,22 +68,19 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   hiveconf.set("hive.plan.serialization.format", "javaXML")
 
-  lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
-  lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+  lazy val warehousePath = Utils.createTempDir()
+  lazy val metastorePath = Utils.createTempDir()
 
   /** Sets up the system initially or after a RESET command */
   protected def configure(): Unit = {
+    warehousePath.delete()
+    metastorePath.delete()
     setConf("javax.jdo.option.ConnectionURL",
       s"jdbc:derby:;databaseName=$metastorePath;create=true")
-    setConf("hive.metastore.warehouse.dir", warehousePath)
-    Utils.registerShutdownDeleteDir(new File(warehousePath))
-    Utils.registerShutdownDeleteDir(new File(metastorePath))
+    setConf("hive.metastore.warehouse.dir", warehousePath.toString)
   }
 
-  val testTempDir = File.createTempFile("testTempFiles", "spark.hive.tmp")
-  testTempDir.delete()
-  testTempDir.mkdir()
-  Utils.registerShutdownDeleteDir(testTempDir)
+  val testTempDir = Utils.createTempDir()
 
   // For some hive test case which contain ${system:test.tmp.dir}
   System.setProperty("test.tmp.dir", testTempDir.getCanonicalPath)
@@ -102,10 +98,16 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   override def executePlan(plan: LogicalPlan): this.QueryExecution =
     new this.QueryExecution(plan)
 
-  /** Fewer partitions to speed up testing. */
-  protected[sql] override lazy val conf: SQLConf = new SQLConf {
-    override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
-    override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+  override protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[hive] class SQLSession extends super.SQLSession {
+    /** Fewer partitions to speed up testing. */
+    protected[sql] override lazy val conf: SQLConf = new SQLConf {
+      override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+      override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+    }
   }
 
   /**
@@ -153,8 +155,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   protected[hive] class HiveQLQueryExecution(hql: String)
     extends this.QueryExecution(HiveQl.parseSql(hql)) {
-    def hiveExec() = runSqlHive(hql)
-    override def toString = hql + "\n" + super.toString
+    def hiveExec(): Seq[String] = runSqlHive(hql)
+    override def toString: String = hql + "\n" + super.toString
   }
 
   /**
@@ -184,7 +186,9 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   case class TestTable(name: String, commands: (()=>Unit)*)
 
   protected[hive] implicit class SqlCmd(sql: String) {
-    def cmd = () => new HiveQLQueryExecution(sql).stringResult(): Unit
+    def cmd: () => Unit = {
+      () => new HiveQLQueryExecution(sql).stringResult(): Unit
+    }
   }
 
   /**
@@ -192,7 +196,10 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
    * demand when a query are run against it.
    */
   lazy val testTables = new mutable.HashMap[String, TestTable]()
-  def registerTestTable(testTable: TestTable) = testTables += (testTable.name -> testTable)
+
+  def registerTestTable(testTable: TestTable): Unit = {
+    testTables += (testTable.name -> testTable)
+  }
 
   // The test tables that are defined in the Hive QTestUtil.
   // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
diff --git a/sql/hive/src/test/resources/golden/create table as with db name within backticks-0-a253b1ed35dbf503d1b8902dacbe23ac b/sql/hive/src/test/resources/golden/create table as with db name within backticks-0-a253b1ed35dbf503d1b8902dacbe23ac
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/create table as with db name within backticks-1-61dc640dfeaff471f3d2b730f9cbf959 b/sql/hive/src/test/resources/golden/create table as with db name within backticks-1-61dc640dfeaff471f3d2b730f9cbf959
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/create table as with db name within backticks-2-ce780d068b8d24786e639e361101a0c7 b/sql/hive/src/test/resources/golden/create table as with db name within backticks-2-ce780d068b8d24786e639e361101a0c7
new file mode 100644
index 0000000000000..7aae61e5eb82f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/create table as with db name within backticks-2-ce780d068b8d24786e639e361101a0c7	
@@ -0,0 +1,500 @@
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
diff --git a/sql/hive/src/test/resources/golden/create table as with db name within backticks-3-afd6e46b6a289c3c24a8eec75a94043c b/sql/hive/src/test/resources/golden/create table as with db name within backticks-3-afd6e46b6a289c3c24a8eec75a94043c
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/semicolon-0-f104632770dc96b81f00ccdac51fe5a8 b/sql/hive/src/test/resources/golden/semicolon-0-f104632770dc96b81f00ccdac51fe5a8
new file mode 100644
index 0000000000000..1b79f38e25b24
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/semicolon-0-f104632770dc96b81f00ccdac51fe5a8
@@ -0,0 +1 @@
+500
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964 b/sql/hive/src/test/resources/golden/udaf_number_format-0-eff4ef3c207d14d5121368f294697964
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16 b/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
new file mode 100644
index 0000000000000..c6f275a0db131
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/udaf_number_format-1-4a03c4328565c60ca99689239f07fb16
@@ -0,0 +1 @@
+0.0	NULL	NULL	NULL
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 44ee5ab5975fb..98f1c0e69e29d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
 import org.scalatest.FunSuite
@@ -38,6 +38,8 @@ class PlanTest extends FunSuite {
     plan transformAllExpressions {
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
+      case a: Alias =>
+        Alias(a.child, a.name)(exprId = ExprId(0))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index d4b175fa443a4..381cd2a29123e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -21,12 +21,11 @@ import java.io.File
 
 import org.scalatest.BeforeAndAfter
 
-import com.google.common.io.Files
-
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /* Implicits */
 import org.apache.spark.sql.hive.test.TestHive._
@@ -112,7 +111,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter {
 
   test("SPARK-4203:random partition directory order") {
     sql("CREATE TABLE tmp_table (key int, value string)")
-    val tmpDir = Files.createTempDir()
+    val tmpDir = Utils.createTempDir()
     sql(s"CREATE TABLE table_with_partition(c1 string) PARTITIONED by (p1 string,p2 string,p3 string,p4 string,p5 string) location '${tmpDir.toURI.toString}'  ")
     sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='1') SELECT 'blarr' FROM tmp_table")
     sql("INSERT OVERWRITE TABLE table_with_partition  partition (p1='a',p2='b',p3='c',p4='c',p5='2') SELECT 'blarr' FROM tmp_table")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 5d6a6f3b64f03..ff2e6ea9ea51d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapred.InvalidInputException
 
-import org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql._
 import org.apache.spark.util.Utils
 import org.apache.spark.sql.types._
@@ -34,8 +35,6 @@ import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.parquet.ParquetRelation2
 import org.apache.spark.sql.sources.LogicalRelation
 
-import scala.collection.mutable.ArrayBuffer
-
 /**
  * Tests for persisting tables created though the data sources API into the metastore.
  */
@@ -43,11 +42,12 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
 
   override def afterEach(): Unit = {
     reset()
-    if (tempPath.exists()) Utils.deleteRecursively(tempPath)
+    Utils.deleteRecursively(tempPath)
   }
 
   val filePath = Utils.getSparkClassLoader.getResource("sample.json").getFile
-  var tempPath: File = util.getTempFilePath("jsonCTAS").getCanonicalFile
+  var tempPath: File = Utils.createTempDir()
+  tempPath.delete()
 
   test ("persistent JSON table") {
     sql(
@@ -154,7 +154,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
   }
 
   test("check change without refresh") {
-    val tempDir = File.createTempFile("sparksql", "json")
+    val tempDir = File.createTempFile("sparksql", "json", Utils.createTempDir())
     tempDir.delete()
     sparkContext.parallelize(("a", "b") :: Nil).toDF()
       .toJSON.saveAsTextFile(tempDir.getCanonicalPath)
@@ -192,7 +192,7 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
   }
 
   test("drop, change, recreate") {
-    val tempDir = File.createTempFile("sparksql", "json")
+    val tempDir = File.createTempFile("sparksql", "json", Utils.createTempDir())
     tempDir.delete()
     sparkContext.parallelize(("a", "b") :: Nil).toDF()
       .toJSON.saveAsTextFile(tempDir.getCanonicalPath)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index a90bd1e257ade..8f3285242091c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -241,7 +241,10 @@ abstract class HiveComparisonTest
       // Clear old output for this testcase.
       outputDirectories.map(new File(_, testCaseName)).filter(_.exists()).foreach(_.delete())
 
-      val allQueries = sql.split("(?<=[^\\\\]);").map(_.trim).filterNot(q => q == "").toSeq
+      val sqlWithoutComment =
+        sql.split("\n").filterNot(l => l.matches("--.*(?<=[^\\\\]);")).mkString("\n")
+      val allQueries =
+        sqlWithoutComment.split("(?<=[^\\\\]);").map(_.trim).filterNot(q => q == "").toSeq
 
       // TODO: DOCUMENT UNSUPPORTED
       val queryList =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index c0d21bc9a89da..de140fc72a2c3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -334,6 +334,14 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       |DROP DATABASE IF EXISTS testdb CASCADE
     """.stripMargin)
 
+  createQueryTest("create table as with db name within backticks",
+    """
+      |CREATE DATABASE IF NOT EXISTS testdb;
+      |CREATE TABLE `testdb`.`createdtable` AS SELECT * FROM default.src;
+      |SELECT * FROM testdb.createdtable;
+      |DROP DATABASE IF EXISTS testdb CASCADE
+    """.stripMargin)
+
   createQueryTest("insert table with db name",
     """
       |CREATE DATABASE IF NOT EXISTS testdb;
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 22ea19bd82f86..1187228f4c3db 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -397,6 +397,13 @@ class SQLQuerySuite extends QueryTest {
     dropTempTable("data")
   }
 
+  test("resolve udtf with single alias") {
+    val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i+1}]}"""))
+    jsonRDD(rdd).registerTempTable("data")
+    val df = sql("SELECT explode(a) AS val FROM data")
+    val col = df("val")
+  }
+
   test("logical.Project should not be resolved if it contains aggregates or generators") {
     // This test is used to test the fix of SPARK-5875.
     // The original issue was that Project's resolved will be true when it contains
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 1904f5faef3a0..d891c4e8903d9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation}
 import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 // The data where the partitioning key exists only in the directory structure.
 case class ParquetData(intField: Int, stringField: String)
@@ -579,13 +580,8 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
   var partitionedTableDirWithKeyAndComplexTypes: File = null
 
   override def beforeAll(): Unit = {
-    partitionedTableDir = File.createTempFile("parquettests", "sparksql")
-    partitionedTableDir.delete()
-    partitionedTableDir.mkdir()
-
-    normalTableDir = File.createTempFile("parquettests", "sparksql")
-    normalTableDir.delete()
-    normalTableDir.mkdir()
+    partitionedTableDir = Utils.createTempDir()
+    normalTableDir = Utils.createTempDir()
 
     (1 to 10).foreach { p =>
       val partDir = new File(partitionedTableDir, s"p=$p")
@@ -601,9 +597,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       .toDF()
       .saveAsParquetFile(new File(normalTableDir, "normal").getCanonicalPath)
 
-    partitionedTableDirWithKey = File.createTempFile("parquettests", "sparksql")
-    partitionedTableDirWithKey.delete()
-    partitionedTableDirWithKey.mkdir()
+    partitionedTableDirWithKey = Utils.createTempDir()
 
     (1 to 10).foreach { p =>
       val partDir = new File(partitionedTableDirWithKey, s"p=$p")
@@ -613,9 +607,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
         .saveAsParquetFile(partDir.getCanonicalPath)
     }
 
-    partitionedTableDirWithKeyAndComplexTypes = File.createTempFile("parquettests", "sparksql")
-    partitionedTableDirWithKeyAndComplexTypes.delete()
-    partitionedTableDirWithKeyAndComplexTypes.mkdir()
+    partitionedTableDirWithKeyAndComplexTypes = Utils.createTempDir()
 
     (1 to 10).foreach { p =>
       val partDir = new File(partitionedTableDirWithKeyAndComplexTypes, s"p=$p")
@@ -625,9 +617,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll
       }.toDF().saveAsParquetFile(partDir.getCanonicalPath)
     }
 
-    partitionedTableDirWithComplexTypes = File.createTempFile("parquettests", "sparksql")
-    partitionedTableDirWithComplexTypes.delete()
-    partitionedTableDirWithComplexTypes.mkdir()
+    partitionedTableDirWithComplexTypes = Utils.createTempDir()
 
     (1 to 10).foreach { p =>
       val partDir = new File(partitionedTableDirWithComplexTypes, s"p=$p")
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 0370b0e9e1aa3..23a8358d45c2a 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -82,6 +82,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.seleniumhq.selenium</groupId>
+      <artifactId>selenium-java</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.novocode</groupId>
       <artifactId>junit-interface</artifactId>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index f88a8a0151550..db64e11e16304 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -43,10 +43,13 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
   val delaySeconds = MetadataCleaner.getDelaySeconds(ssc.conf)
   val sparkConfPairs = ssc.conf.getAll
 
-  def sparkConf = {
-    new SparkConf(false).setAll(sparkConfPairs)
+  def createSparkConf(): SparkConf = {
+    val newSparkConf = new SparkConf(loadDefaults = false).setAll(sparkConfPairs)
       .remove("spark.driver.host")
       .remove("spark.driver.port")
+    val newMasterOption = new SparkConf(loadDefaults = true).getOption("spark.master")
+    newMasterOption.foreach { newMaster => newSparkConf.setMaster(newMaster) }
+    newSparkConf
   }
 
   def validate() {
@@ -116,7 +119,10 @@ class CheckpointWriter(
   private var stopped = false
   private var fs_ : FileSystem = _
 
-  class CheckpointWriteHandler(checkpointTime: Time, bytes: Array[Byte]) extends Runnable {
+  class CheckpointWriteHandler(
+      checkpointTime: Time,
+      bytes: Array[Byte],
+      clearCheckpointDataLater: Boolean) extends Runnable {
     def run() {
       var attempts = 0
       val startTime = System.currentTimeMillis()
@@ -163,7 +169,7 @@ class CheckpointWriter(
           val finishTime = System.currentTimeMillis()
           logInfo("Checkpoint for time " + checkpointTime + " saved to file '" + checkpointFile +
             "', took " + bytes.length + " bytes and " + (finishTime - startTime) + " ms")
-          jobGenerator.onCheckpointCompletion(checkpointTime)
+          jobGenerator.onCheckpointCompletion(checkpointTime, clearCheckpointDataLater)
           return
         } catch {
           case ioe: IOException =>
@@ -177,7 +183,7 @@ class CheckpointWriter(
     }
   }
 
-  def write(checkpoint: Checkpoint) {
+  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) {
     val bos = new ByteArrayOutputStream()
     val zos = compressionCodec.compressedOutputStream(bos)
     val oos = new ObjectOutputStream(zos)
@@ -185,7 +191,8 @@ class CheckpointWriter(
     oos.close()
     bos.close()
     try {
-      executor.execute(new CheckpointWriteHandler(checkpoint.checkpointTime, bos.toByteArray))
+      executor.execute(new CheckpointWriteHandler(
+        checkpoint.checkpointTime, bos.toByteArray, clearCheckpointDataLater))
       logDebug("Submitted checkpoint of time " + checkpoint.checkpointTime + " writer queue")
     } catch {
       case rej: RejectedExecutionException =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index ba3f23434f24c..543224d4b07bc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -116,7 +116,7 @@ class StreamingContext private[streaming] (
 
   private[streaming] val sc: SparkContext = {
     if (isCheckpointPresent) {
-      new SparkContext(cp_.sparkConf)
+      new SparkContext(cp_.createSparkConf())
     } else {
       sc_
     }
@@ -578,6 +578,7 @@ class StreamingContext private[streaming] (
     // Even if we have already stopped, we still need to attempt to stop the SparkContext because
     // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true).
     if (stopSparkContext) sc.stop()
+    uiTab.foreach(_.detach())
     // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
index 505e4431e4350..01cdcb0574040 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
@@ -36,7 +36,7 @@ import org.apache.spark.streaming.dstream.DStream
  * [[org.apache.spark.streaming.api.java.JavaPairDStream]].
  */
 class JavaDStream[T](val dstream: DStream[T])(implicit val classTag: ClassTag[T])
-    extends JavaDStreamLike[T, JavaDStream[T], JavaRDD[T]] {
+    extends AbstractJavaDStreamLike[T, JavaDStream[T], JavaRDD[T]] {
 
   override def wrapRDD(rdd: RDD[T]): JavaRDD[T] = JavaRDD.fromRDD(rdd)
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index c382a12f4d099..2eabdd9387913 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -34,6 +34,15 @@ import org.apache.spark.streaming._
 import org.apache.spark.streaming.api.java.JavaDStream._
 import org.apache.spark.streaming.dstream.DStream
 
+/**
+ * As a workaround for https://issues.scala-lang.org/browse/SI-8905, implementations
+ * of JavaDStreamLike should extend this dummy abstract class instead of directly inheriting
+ * from the trait. See SPARK-3266 for additional details.
+ */
+private[streaming]
+abstract class AbstractJavaDStreamLike[T, This <: JavaDStreamLike[T, This, R],
+  R <: JavaRDDLike[T, R]] extends JavaDStreamLike[T, This, R]
+
 trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]]
     extends Serializable {
   implicit val classTag: ClassTag[T]
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index bd01789b611a4..f94f2d0e8bd31 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -45,7 +45,7 @@ import org.apache.spark.streaming.dstream.DStream
 class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     implicit val kManifest: ClassTag[K],
     implicit val vManifest: ClassTag[V])
-    extends JavaDStreamLike[(K, V), JavaPairDStream[K, V], JavaPairRDD[K, V]] {
+    extends AbstractJavaDStreamLike[(K, V), JavaPairDStream[K, V], JavaPairRDD[K, V]] {
 
   override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index ac92774a38273..59488dfb0f8c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -30,7 +30,8 @@ import org.apache.spark.util.{Clock, ManualClock}
 private[scheduler] sealed trait JobGeneratorEvent
 private[scheduler] case class GenerateJobs(time: Time) extends JobGeneratorEvent
 private[scheduler] case class ClearMetadata(time: Time) extends JobGeneratorEvent
-private[scheduler] case class DoCheckpoint(time: Time) extends JobGeneratorEvent
+private[scheduler] case class DoCheckpoint(
+    time: Time, clearCheckpointDataLater: Boolean) extends JobGeneratorEvent
 private[scheduler] case class ClearCheckpointData(time: Time) extends JobGeneratorEvent
 
 /**
@@ -163,8 +164,10 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   /**
    * Callback called when the checkpoint of a batch has been written.
    */
-  def onCheckpointCompletion(time: Time) {
-    eventActor ! ClearCheckpointData(time)
+  def onCheckpointCompletion(time: Time, clearCheckpointDataLater: Boolean) {
+    if (clearCheckpointDataLater) {
+      eventActor ! ClearCheckpointData(time)
+    }
   }
 
   /** Processes all events */
@@ -173,7 +176,8 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     event match {
       case GenerateJobs(time) => generateJobs(time)
       case ClearMetadata(time) => clearMetadata(time)
-      case DoCheckpoint(time) => doCheckpoint(time)
+      case DoCheckpoint(time, clearCheckpointDataLater) =>
+        doCheckpoint(time, clearCheckpointDataLater)
       case ClearCheckpointData(time) => clearCheckpointData(time)
     }
   }
@@ -245,7 +249,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
       case Failure(e) =>
         jobScheduler.reportError("Error generating jobs for time " + time, e)
     }
-    eventActor ! DoCheckpoint(time)
+    eventActor ! DoCheckpoint(time, clearCheckpointDataLater = false)
   }
 
   /** Clear DStream metadata for the given `time`. */
@@ -255,7 +259,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     // If checkpointing is enabled, then checkpoint,
     // else mark batch to be fully processed
     if (shouldCheckpoint) {
-      eventActor ! DoCheckpoint(time)
+      eventActor ! DoCheckpoint(time, clearCheckpointDataLater = true)
     } else {
       // If checkpointing is not enabled, then delete metadata information about
       // received blocks (block data not saved in any case). Otherwise, wait for
@@ -278,11 +282,11 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Perform checkpoint for the give `time`. */
-  private def doCheckpoint(time: Time) {
+  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean) {
     if (shouldCheckpoint && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
       logInfo("Checkpointing graph for time " + time)
       ssc.graph.updateCheckpointData(time)
-      checkpointWriter.write(new Checkpoint(ssc, time))
+      checkpointWriter.write(new Checkpoint(ssc, time), clearCheckpointDataLater)
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index b3ffc71904c76..60bc099b27a4c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -61,7 +61,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
       }
     }), "JobScheduler")
 
-    listenerBus.start()
+    listenerBus.start(ssc.sparkContext)
     receiverTracker = new ReceiverTracker(ssc)
     receiverTracker.start()
     jobGenerator.start()
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 98e9a2e639e25..bfe8086fcf8fe 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -32,7 +32,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
   extends WebUIPage("") with Logging {
 
   private val listener = parent.listener
-  private val startTime = Calendar.getInstance().getTime()
+  private val startTime = System.currentTimeMillis()
   private val emptyCell = "-"
 
   /** Render the page */
@@ -47,7 +47,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
 
   /** Generate basic stats of the streaming program */
   private def generateBasicStats(): Seq[Node] = {
-    val timeSinceStart = System.currentTimeMillis() - startTime.getTime
+    val timeSinceStart = System.currentTimeMillis() - startTime
     <ul class ="unstyled">
       <li>
         <strong>Started at: </strong> {startTime.toString}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
index d9d04cd706a04..9a860ea4a6c68 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
@@ -36,6 +36,10 @@ private[spark] class StreamingTab(ssc: StreamingContext)
   ssc.addStreamingListener(listener)
   attachPage(new StreamingPage(this))
   parent.attachTab(this)
+
+  def detach() {
+    getSparkUI(ssc).detachTab(this)
+  }
 }
 
 private object StreamingTab {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 03c448f1df5f1..91a2b2bba461d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -146,7 +146,7 @@ class CheckpointSuite extends TestSuiteBase {
 
   // This tests whether spark conf persists through checkpoints, and certain
   // configs gets scrubbed
-  test("persistence of conf through checkpoints") {
+  test("recovery of conf through checkpoints") {
     val key = "spark.mykey"
     val value = "myvalue"
     System.setProperty(key, value)
@@ -154,7 +154,7 @@ class CheckpointSuite extends TestSuiteBase {
     val originalConf = ssc.conf
 
     val cp = new Checkpoint(ssc, Time(1000))
-    val cpConf = cp.sparkConf
+    val cpConf = cp.createSparkConf()
     assert(cpConf.get("spark.master") === originalConf.get("spark.master"))
     assert(cpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
     assert(cpConf.get(key) === value)
@@ -163,7 +163,8 @@ class CheckpointSuite extends TestSuiteBase {
     // Serialize/deserialize to simulate write to storage and reading it back
     val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
 
-    val newCpConf = newCp.sparkConf
+    // Verify new SparkConf has all the previous properties
+    val newCpConf = newCp.createSparkConf()
     assert(newCpConf.get("spark.master") === originalConf.get("spark.master"))
     assert(newCpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
     assert(newCpConf.get(key) === value)
@@ -174,6 +175,20 @@ class CheckpointSuite extends TestSuiteBase {
     ssc = new StreamingContext(null, newCp, null)
     val restoredConf = ssc.conf
     assert(restoredConf.get(key) === value)
+    ssc.stop()
+
+    // Verify new SparkConf picks up new master url if it is set in the properties. See SPARK-6331.
+    try {
+      val newMaster = "local[100]"
+      System.setProperty("spark.master", newMaster)
+      val newCpConf = newCp.createSparkConf()
+      assert(newCpConf.get("spark.master") === newMaster)
+      assert(newCpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
+      ssc = new StreamingContext(null, newCp, null)
+      assert(ssc.sparkContext.master === newMaster)
+    } finally {
+      System.clearProperty("spark.master")
+    }
   }
 
 
@@ -207,7 +222,7 @@ class CheckpointSuite extends TestSuiteBase {
   }
 
   test("recovery with saveAsHadoopFiles operation") {
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     try {
       testCheckpointedOperation(
         Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
@@ -230,7 +245,7 @@ class CheckpointSuite extends TestSuiteBase {
   }
 
   test("recovery with saveAsNewAPIHadoopFiles operation") {
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     try {
       testCheckpointedOperation(
         Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
@@ -268,7 +283,7 @@ class CheckpointSuite extends TestSuiteBase {
     //
     // After SPARK-5079 is addressed, should be able to remove this test since a strengthened
     // version of the other saveAsHadoopFile* tests would prevent regressions for this issue.
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     try {
       testCheckpointedOperation(
         Seq(Seq("a", "a", "b"), Seq("", ""), Seq(), Seq("a", "a", "b"), Seq("", ""), Seq()),
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 6500608bba87c..26435d8515815 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -20,15 +20,13 @@ package org.apache.spark.streaming
 import org.apache.spark.Logging
 import org.apache.spark.util.Utils
 
-import java.io.File
-
 /**
  * This testsuite tests master failures at random times while the stream is running using
  * the real clock.
  */
 class FailureSuite extends TestSuiteBase with Logging {
 
-  val directory = Utils.createTempDir().getAbsolutePath
+  val directory = Utils.createTempDir()
   val numBatches = 30
 
   override def batchDuration = Milliseconds(1000)
@@ -36,16 +34,16 @@ class FailureSuite extends TestSuiteBase with Logging {
   override def useManualClock = false
 
   override def afterFunction() {
-    Utils.deleteRecursively(new File(directory))
+    Utils.deleteRecursively(directory)
     super.afterFunction()
   }
 
   test("multiple failures with map") {
-    MasterFailureTest.testMap(directory, numBatches, batchDuration)
+    MasterFailureTest.testMap(directory.getAbsolutePath, numBatches, batchDuration)
   }
 
   test("multiple failures with updateStateByKey") {
-    MasterFailureTest.testUpdateStateByKey(directory, numBatches, batchDuration)
+    MasterFailureTest.testUpdateStateByKey(directory.getAbsolutePath, numBatches, batchDuration)
   }
 }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 818f551dbe996..18a477f92094d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -25,8 +25,6 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import akka.actor.{ActorSystem, Props}
-import com.google.common.io.Files
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 import org.scalatest.concurrent.Eventually._
@@ -39,7 +37,7 @@ import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.storage._
 import org.apache.spark.streaming.receiver._
 import org.apache.spark.streaming.util._
-import org.apache.spark.util.{AkkaUtils, ManualClock}
+import org.apache.spark.util.{AkkaUtils, ManualClock, Utils}
 import WriteAheadLogBasedBlockHandler._
 import WriteAheadLogSuite._
 
@@ -76,7 +74,7 @@ class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matche
       new NioBlockTransferService(conf, securityMgr), securityMgr, 0)
     blockManager.initialize("app-id")
 
-    tempDirectory = Files.createTempDir()
+    tempDirectory = Utils.createTempDir()
     manualClock.setTime(0)
   }
 
@@ -93,10 +91,7 @@ class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matche
     actorSystem.awaitTermination()
     actorSystem = null
 
-    if (tempDirectory != null && tempDirectory.exists()) {
-      FileUtils.deleteDirectory(tempDirectory)
-      tempDirectory = null
-    }
+    Utils.deleteRecursively(tempDirectory)
   }
 
   test("BlockManagerBasedBlockHandler - store blocks") {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index a3a0fd5187403..42fad769f0c1a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -24,8 +24,6 @@ import scala.concurrent.duration._
 import scala.language.{implicitConversions, postfixOps}
 import scala.util.Random
 
-import com.google.common.io.Files
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 import org.scalatest.concurrent.Eventually._
@@ -51,15 +49,12 @@ class ReceivedBlockTrackerSuite
 
   before {
     conf = new SparkConf().setMaster("local[2]").setAppName("ReceivedBlockTrackerSuite")
-    checkpointDirectory = Files.createTempDir()
+    checkpointDirectory = Utils.createTempDir()
   }
 
   after {
     allReceivedBlockTrackers.foreach { _.stop() }
-    if (checkpointDirectory != null && checkpointDirectory.exists()) {
-      FileUtils.deleteDirectory(checkpointDirectory)
-      checkpointDirectory = null
-    }
+    Utils.deleteRecursively(checkpointDirectory)
   }
 
   test("block addition, and block to batch allocation") {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index e8c34a9ee40b9..aa20ad0b5374e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.Semaphore
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import com.google.common.io.Files
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -34,6 +33,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.receiver._
 import org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler._
+import org.apache.spark.util.Utils
 
 /** Testsuite for testing the network receiver behavior */
 class ReceiverSuite extends TestSuiteBase with Timeouts with Serializable {
@@ -222,7 +222,7 @@ class ReceiverSuite extends TestSuiteBase with Timeouts with Serializable {
       .set("spark.streaming.receiver.writeAheadLog.enable", "true")
       .set("spark.streaming.receiver.writeAheadLog.rollingInterval", "1")
     val batchDuration = Milliseconds(500)
-    val tempDirectory = Files.createTempDir()
+    val tempDirectory = Utils.createTempDir()
     val logDirectory1 = new File(checkpointDirToLogDir(tempDirectory.getAbsolutePath, 0))
     val logDirectory2 = new File(checkpointDirToLogDir(tempDirectory.getAbsolutePath, 1))
     val allLogFiles1 = new mutable.HashSet[String]()
@@ -251,7 +251,6 @@ class ReceiverSuite extends TestSuiteBase with Timeouts with Serializable {
     }
 
     withStreamingContext(new StreamingContext(sparkConf, batchDuration)) { ssc =>
-      tempDirectory.deleteOnExit()
       val receiver1 = ssc.sparkContext.clean(new FakeReceiver(sendData = true))
       val receiver2 = ssc.sparkContext.clean(new FakeReceiver(sendData = true))
       val receiverStream1 = ssc.receiverStream(receiver1)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 6a7cd97aa3222..2e5005ef6ff14 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -100,7 +100,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w
     assert(cp.sparkConfPairs.toMap.getOrElse("spark.cleaner.ttl", "-1") === "10")
     ssc1.stop()
     val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
-    assert(newCp.sparkConf.getInt("spark.cleaner.ttl", -1) === 10)
+    assert(newCp.createSparkConf().getInt("spark.cleaner.ttl", -1) === 10)
     ssc = new StreamingContext(null, newCp, null)
     assert(ssc.conf.getInt("spark.cleaner.ttl", -1) === 10)
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
new file mode 100644
index 0000000000000..87a0395efbf2a
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.openqa.selenium.WebDriver
+import org.openqa.selenium.htmlunit.HtmlUnitDriver
+import org.scalatest._
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.selenium.WebBrowser
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark._
+
+
+
+
+/**
+ * Selenium tests for the Spark Web UI.
+ */
+class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with BeforeAndAfterAll with TestSuiteBase {
+
+  implicit var webDriver: WebDriver = _
+
+  override def beforeAll(): Unit = {
+    webDriver = new HtmlUnitDriver
+  }
+
+  override def afterAll(): Unit = {
+    if (webDriver != null) {
+      webDriver.quit()
+    }
+  }
+
+  /**
+   * Create a test SparkStreamingContext with the SparkUI enabled.
+   */
+  private def newSparkStreamingContext(): StreamingContext = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.ui.enabled", "true")
+    val ssc = new StreamingContext(conf, Seconds(1))
+    assert(ssc.sc.ui.isDefined, "Spark UI is not started!")
+    ssc
+  }
+
+  test("attaching and detaching a Streaming tab") {
+    withStreamingContext(newSparkStreamingContext()) { ssc =>
+      val sparkUI = ssc.sparkContext.ui.get
+
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sparkUI.appUIAddress.stripSuffix("/"))
+        find(cssSelector( """ul li a[href*="streaming"]""")) should not be (None)
+      }
+
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        // check whether streaming page exists
+        go to (sparkUI.appUIAddress.stripSuffix("/") + "/streaming")
+        val statisticText = findAll(cssSelector("li strong")).map(_.text).toSeq
+        statisticText should contain("Network receivers:")
+        statisticText should contain("Batch interval:")
+      }
+
+      ssc.stop(false)
+
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sparkUI.appUIAddress.stripSuffix("/"))
+        find(cssSelector( """ul li a[href*="streaming"]""")) should be(None)
+      }
+
+      eventually(timeout(10 seconds), interval(50 milliseconds)) {
+        go to (sparkUI.appUIAddress.stripSuffix("/") + "/streaming")
+        val statisticText = findAll(cssSelector("li strong")).map(_.text).toSeq
+        statisticText should not contain ("Network receivers:")
+        statisticText should not contain ("Batch interval:")
+      }
+    }
+  }
+}
+  
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
deleted file mode 100644
index 8e30118266855..0000000000000
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import scala.io.Source
-
-import org.scalatest.FunSuite
-import org.scalatest.concurrent.Eventually._
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.SparkConf
-
-class UISuite extends FunSuite {
-
-  // Ignored: See SPARK-1530
-  ignore("streaming tab in spark UI") {
-    val conf = new SparkConf()
-      .setMaster("local")
-      .setAppName("test")
-      .set("spark.ui.enabled", "true")
-    val ssc = new StreamingContext(conf, Seconds(1))
-    assert(ssc.sc.ui.isDefined, "Spark UI is not started!")
-    val ui = ssc.sc.ui.get
-
-    eventually(timeout(10 seconds), interval(50 milliseconds)) {
-      val html = Source.fromURL(ui.appUIAddress).mkString
-      assert(!html.contains("random data that should not be present"))
-      // test if streaming tab exist
-      assert(html.toLowerCase.contains("streaming"))
-      // test if other Spark tabs still exist
-      assert(html.toLowerCase.contains("stages"))
-    }
-
-    eventually(timeout(10 seconds), interval(50 milliseconds)) {
-      val html = Source.fromURL(ui.appUIAddress.stripSuffix("/") + "/streaming").mkString
-      assert(html.toLowerCase.contains("batch"))
-      assert(html.toLowerCase.contains("network"))
-    }
-  }
-}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
new file mode 100644
index 0000000000000..4150b60635ed6
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import java.util.concurrent.CountDownLatch
+
+import scala.concurrent.duration._
+import scala.language.postfixOps
+
+import org.scalatest.concurrent.Eventually._
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming._
+import org.apache.spark.util.{ManualClock, Utils}
+
+class JobGeneratorSuite extends TestSuiteBase {
+
+  // SPARK-6222 is a tricky regression bug which causes received block metadata
+  // to be deleted before the corresponding batch has completed. This occurs when
+  // the following conditions are met.
+  // 1. streaming checkpointing is enabled by setting streamingContext.checkpoint(dir)
+  // 2. input data is received through a receiver as blocks
+  // 3. a batch processing a set of blocks takes a long time, such that a few subsequent
+  //    batches have been generated and submitted for processing.
+  //
+  // The JobGenerator (as of Mar 16, 2015) checkpoints twice per batch, once after generation
+  // of a batch, and another time after the completion of a batch. The cleanup of
+  // checkpoint data (including block metadata, etc.) from DStream must be done only after the
+  // 2nd checkpoint has completed, that is, after the batch has been completely processed.
+  // However, the issue is that the checkpoint data and along with it received block data is
+  // cleaned even in the case of the 1st checkpoint, causing pre-mature deletion of received block
+  // data. For example, if the 3rd batch is still being process, the 7th batch may get generated,
+  // and the corresponding "1st checkpoint" will delete received block metadata of batch older
+  // than 6th batch. That, is 3rd batch's block metadata gets deleted even before 3rd batch has
+  // been completely processed.
+  //
+  // This test tries to create that scenario by the following.
+  // 1. enable checkpointing
+  // 2. generate batches with received blocks
+  // 3. make the 3rd batch never complete
+  // 4. allow subsequent batches to be generated (to allow premature deletion of 3rd batch metadata)
+  // 5. verify whether 3rd batch's block metadata still exists
+  //
+  test("SPARK-6222: Do not clear received block data too soon") {
+    import JobGeneratorSuite._
+    val checkpointDir = Utils.createTempDir()
+    val testConf = conf
+    testConf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    testConf.set("spark.streaming.receiver.writeAheadLog.rollingInterval", "1")
+
+    withStreamingContext(new StreamingContext(testConf, batchDuration)) { ssc =>
+      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+      val numBatches = 10
+      val longBatchNumber = 3 // 3rd batch will take a long time
+      val longBatchTime = longBatchNumber * batchDuration.milliseconds
+
+      val testTimeout = timeout(10 seconds)
+      val inputStream = ssc.receiverStream(new TestReceiver)
+
+      inputStream.foreachRDD((rdd: RDD[Int], time: Time) => {
+        if (time.milliseconds == longBatchTime) {
+          while (waitLatch.getCount() > 0) {
+            waitLatch.await()
+            println("Await over")
+          }
+        }
+      })
+
+      val batchCounter = new BatchCounter(ssc)
+      ssc.checkpoint(checkpointDir.getAbsolutePath)
+      ssc.start()
+
+      // Make sure the only 1 batch of information is to be remembered
+      assert(inputStream.rememberDuration === batchDuration)
+      val receiverTracker = ssc.scheduler.receiverTracker
+
+      // Get the blocks belonging to a batch
+      def getBlocksOfBatch(batchTime: Long) = {
+        receiverTracker.getBlocksOfBatchAndStream(Time(batchTime), inputStream.id)
+      }
+
+      // Wait for new blocks to be received
+      def waitForNewReceivedBlocks() {
+        eventually(testTimeout) {
+          assert(receiverTracker.hasUnallocatedBlocks)
+        }
+      }
+
+      // Wait for received blocks to be allocated to a batch
+      def waitForBlocksToBeAllocatedToBatch(batchTime: Long) {
+        eventually(testTimeout) {
+          assert(getBlocksOfBatch(batchTime).nonEmpty)
+        }
+      }
+
+      // Generate a large number of batches with blocks in them
+      for (batchNum <- 1 to numBatches) {
+        waitForNewReceivedBlocks()
+        clock.advance(batchDuration.milliseconds)
+        waitForBlocksToBeAllocatedToBatch(clock.getTimeMillis())
+      }
+
+      // Wait for 3rd batch to start
+      eventually(testTimeout) {
+        ssc.scheduler.getPendingTimes().contains(Time(numBatches * batchDuration.milliseconds))
+      }
+
+      // Verify that the 3rd batch's block data is still present while the 3rd batch is incomplete
+      assert(getBlocksOfBatch(longBatchTime).nonEmpty, "blocks of incomplete batch already deleted")
+      assert(batchCounter.getNumCompletedBatches < longBatchNumber)
+      waitLatch.countDown()
+    }
+  }
+}
+
+object JobGeneratorSuite {
+  val waitLatch = new CountDownLatch(1)
+}
diff --git a/tools/pom.xml b/tools/pom.xml
index 181236d1bcbf6..1c6f3e83a1819 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index c13534f0410a1..7c8c3613e7a05 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index e966bfba7bb7d..3d18690cd9cbf 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -151,7 +151,7 @@ private[spark] class ApplicationMaster(
         logError("Uncaught exception: ", e)
         finish(FinalApplicationStatus.FAILED,
           ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
-          "Uncaught exception: " + e.getMessage())
+          "Uncaught exception: " + e)
     }
     exitCode
   }
@@ -486,10 +486,10 @@ private[spark] class ApplicationMaster(
               case _: InterruptedException =>
                 // Reporter thread can interrupt to stop user class
               case cause: Throwable =>
-                logError("User class threw exception: " + cause.getMessage, cause)
+                logError("User class threw exception: " + cause, cause)
                 finish(FinalApplicationStatus.FAILED,
                   ApplicationMaster.EXIT_EXCEPTION_USER_CLASS,
-                  "User class threw exception: " + cause.getMessage)
+                  "User class threw exception: " + cause)
             }
         }
       }
@@ -534,7 +534,6 @@ private[spark] class ApplicationMaster(
         driver ! x
 
       case RequestExecutors(requestedTotal) =>
-        logInfo(s"Driver requested a total number of $requestedTotal executor(s).")
         Option(allocator) match {
           case Some(a) => a.requestTotalExecutors(requestedTotal)
           case None => logWarning("Container allocator is not ready to request executors yet.")
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 55bfbcd9cb84b..c98763e15b58f 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -86,7 +86,8 @@ private[yarn] class YarnAllocator(
   @volatile private var targetNumExecutors = args.numExecutors
 
   // Keep track of which container is running which executor to remove the executors later
-  private val executorIdToContainer = new HashMap[String, Container]
+  // Visible for testing.
+  private[yarn] val executorIdToContainer = new HashMap[String, Container]
 
   // Executor memory in MB.
   protected val executorMemory = args.executorMemory
@@ -137,7 +138,10 @@ private[yarn] class YarnAllocator(
    * be killed.
    */
   def requestTotalExecutors(requestedTotal: Int): Unit = synchronized {
-    targetNumExecutors = requestedTotal
+    if (requestedTotal != targetNumExecutors) {
+      logInfo(s"Driver requested a total number of $requestedTotal executor(s).")
+      targetNumExecutors = requestedTotal
+    }
   }
 
   /**
@@ -148,8 +152,6 @@ private[yarn] class YarnAllocator(
       val container = executorIdToContainer.remove(executorId).get
       internalReleaseContainer(container)
       numExecutorsRunning -= 1
-      targetNumExecutors -= 1
-      assert(targetNumExecutors >= 0, "Allocator killed more executors than are allocated!")
     } else {
       logWarning(s"Attempted to kill unknown executor $executorId!")
     }
@@ -351,7 +353,8 @@ private[yarn] class YarnAllocator(
     }
   }
 
-  private def processCompletedContainers(completedContainers: Seq[ContainerStatus]): Unit = {
+  // Visible for testing.
+  private[yarn] def processCompletedContainers(completedContainers: Seq[ContainerStatus]): Unit = {
     for (completedContainer <- completedContainers) {
       val containerId = completedContainer.getContainerId
 
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 3c224f148802e..c09b01bafce37 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -206,6 +206,28 @@ class YarnAllocatorSuite extends FunSuite with Matchers with BeforeAndAfterEach
     handler.getNumExecutorsRunning should be (2)
   }
 
+  test("kill executors") {
+    val handler = createAllocator(4)
+    handler.updateResourceRequests()
+    handler.getNumExecutorsRunning should be (0)
+    handler.getNumPendingAllocate should be (4)
+
+    val container1 = createContainer("host1")
+    val container2 = createContainer("host2")
+    handler.handleAllocatedContainers(Array(container1, container2))
+
+    handler.requestTotalExecutors(1)
+    handler.executorIdToContainer.keys.foreach { id => handler.killExecutor(id ) }
+
+    val statuses = Seq(container1, container2).map { c =>
+      ContainerStatus.newInstance(c.getId(), ContainerState.COMPLETE, "Finished", 0)
+    }
+    handler.updateResourceRequests()
+    handler.processCompletedContainers(statuses.toSeq)
+    handler.getNumExecutorsRunning should be (0)
+    handler.getNumPendingAllocate should be (1)
+  }
+
   test("memory exceeded diagnostic regexes") {
     val diagnostics =
       "Container [pid=12465,containerID=container_1412887393566_0003_01_000002] is running " +
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index b5a2db8f6225c..4194f36499e66 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -50,7 +50,7 @@ class YarnSparkHadoopUtilSuite extends FunSuite with Matchers with Logging {
     if (hasBash) test(name)(fn) else ignore(name)(fn)
 
   bashTest("shell script escaping") {
-    val scriptFile = File.createTempFile("script.", ".sh")
+    val scriptFile = File.createTempFile("script.", ".sh", Utils.createTempDir())
     val args = Array("arg1", "${arg.2}", "\"arg3\"", "'arg4'", "$arg5", "\\arg6")
     try {
       val argLine = args.map(a => YarnSparkHadoopUtil.escapeForShell(a)).mkString(" ")