apache · tdas · Feb 5, 2014 · Feb 6, 2014 · Feb 14, 2014 · Feb 14, 2014
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
+
+import org.apache.spark.rdd.RDD
+
+/** Listener class used for testing when any item has been cleaned by the Cleaner class */
+private[spark] trait CleanerListener {
+  def rddCleaned(rddId: Int)
+  def shuffleCleaned(shuffleId: Int)
+}
+
+/**
+ * Cleans RDDs and shuffle data.
+ */
+private[spark] class ContextCleaner(env: SparkEnv) extends Logging {
+
+  /** Classes to represent cleaning tasks */
+  private sealed trait CleaningTask
+  private case class CleanRDD(sc: SparkContext, id: Int) extends CleaningTask
+  private case class CleanShuffle(id: Int) extends CleaningTask
+  // TODO: add CleanBroadcast
+
+  private val queue = new LinkedBlockingQueue[CleaningTask]
+
+  protected val listeners = new ArrayBuffer[CleanerListener]
+    with SynchronizedBuffer[CleanerListener]
+
+  private val cleaningThread = new Thread() { override def run() { keepCleaning() }}
+
+  private var stopped = false
+
+  /** Start the cleaner */
+  def start() {
+    cleaningThread.setDaemon(true)
+    cleaningThread.start()
+  }
+
+  /** Stop the cleaner */
+  def stop() {
+    synchronized { stopped = true }
+    cleaningThread.interrupt()
+  }
+
+  /** Clean (unpersist) RDD data. */
+  def cleanRDD(rdd: RDD[_]) {
+    enqueue(CleanRDD(rdd.sparkContext, rdd.id))
+    logDebug("Enqueued RDD " + rdd + " for cleaning up")
+  }
+
+  /** Clean shuffle data. */
+  def cleanShuffle(shuffleId: Int) {
+    enqueue(CleanShuffle(shuffleId))
+    logDebug("Enqueued shuffle " + shuffleId + " for cleaning up")
+  }
+
+  def attachListener(listener: CleanerListener) {
+    listeners += listener
+  }
+  /** Enqueue a cleaning task */
+  private def enqueue(task: CleaningTask) {
+    queue.put(task)
+  }
+
+  /** Keep cleaning RDDs and shuffle data */
+  private def keepCleaning() {
+    try {
+      while (!isStopped) {
+        val taskOpt = Option(queue.poll(100, TimeUnit.MILLISECONDS))
+        if (taskOpt.isDefined) {
+          logDebug("Got cleaning task " + taskOpt.get)
+          taskOpt.get match {
+            case CleanRDD(sc, rddId) => doCleanRDD(sc, rddId)
+            case CleanShuffle(shuffleId) => doCleanShuffle(shuffleId)
+          }
+        }
+      }
+    } catch {
+      case ie: java.lang.InterruptedException =>
+        if (!isStopped) logWarning("Cleaning thread interrupted")
+    }
+  }
+
+  /** Perform RDD cleaning */
+  private def doCleanRDD(sc: SparkContext, rddId: Int) {
+    logDebug("Cleaning rdd " + rddId)
+    sc.env.blockManager.master.removeRdd(rddId, false)
+    sc.persistentRdds.remove(rddId)
+    listeners.foreach(_.rddCleaned(rddId))
+    logInfo("Cleaned rdd " + rddId)
+  }
+
+  /** Perform shuffle cleaning */
+  private def doCleanShuffle(shuffleId: Int) {
+    logDebug("Cleaning shuffle " + shuffleId)
+    mapOutputTrackerMaster.unregisterShuffle(shuffleId)
+    blockManager.master.removeShuffle(shuffleId)
+    listeners.foreach(_.shuffleCleaned(shuffleId))
+    logInfo("Cleaned shuffle " + shuffleId)
+  }
+
+  private def mapOutputTrackerMaster = env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+
+  private def blockManager = env.blockManager
+
+  private def isStopped = synchronized { stopped }
+}
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -49,9 +49,28 @@ class ShuffleDependency[K, V](
     @transient rdd: RDD[_ <: Product2[K, V]],
     val partitioner: Partitioner,
     val serializerClass: String = null)
-  extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
+  extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) with Logging {
 
   val shuffleId: Int = rdd.context.newShuffleId()
+
+  override def finalize() {
+    try {
+      if (rdd != null) {
+        rdd.sparkContext.cleaner.cleanShuffle(shuffleId)
+      }
+    } catch {
+      case t: Throwable =>
+        // Paranoia - If logError throws error as well, report to stderr.
+        try {
+          logError("Error in finalize", t)
+        } catch {
+          case _ =>
+            System.err.println("Error in finalize (and could not write to logError): " + t)
+        }
+    } finally {
+      super.finalize()
+    }
+  }
 }
 
 

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark
 
+import scala.Some
+import scala.collection.mutable.{HashSet, Map}
+import scala.concurrent.Await
+
 import java.io._
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
@@ -28,7 +32,7 @@ import akka.pattern.ask
 
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{AkkaUtils, MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
+import org.apache.spark.util.{AkkaUtils, TimeStampedHashMap, BoundedHashMap}
 
 private[spark] sealed trait MapOutputTrackerMessage
 private[spark] case class GetMapOutputStatuses(shuffleId: Int)
@@ -50,23 +54,26 @@ private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster
   }
 }
 
-private[spark] class MapOutputTracker(conf: SparkConf) extends Logging {
+/**
+ * Class that keeps track of the location of the location of the mapt output of
+ * a stage. This is abstract because different versions of MapOutputTracker
+ * (driver and worker) use different HashMap to store its metadata.
+ */
+private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging {
 
   private val timeout = AkkaUtils.askTimeout(conf)
 
   // Set to the MapOutputTrackerActor living on the driver
   var trackerActor: ActorRef = _
 
-  protected val mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
+  /** This HashMap needs to have different storage behavior for driver and worker */
+  protected val mapStatuses: Map[Int, Array[MapStatus]]
 
   // Incremented every time a fetch fails so that client nodes know to clear
   // their cache of map output locations if this happens.
   protected var epoch: Long = 0
   protected val epochLock = new java.lang.Object
 
-  private val metadataCleaner =
-    new MetadataCleaner(MetadataCleanerType.MAP_OUTPUT_TRACKER, this.cleanup, conf)
-
   // Send a message to the trackerActor and get its result within a default timeout, or
   // throw a SparkException if this fails.
   private def askTracker(message: Any): Any = {
@@ -137,8 +144,7 @@ private[spark] class MapOutputTracker(conf: SparkConf) extends Logging {
         fetchedStatuses.synchronized {
           return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, fetchedStatuses)
         }
-      }
-      else {
+      } else {
         throw new FetchFailedException(null, shuffleId, -1, reduceId,
           new Exception("Missing all output locations for shuffle " + shuffleId))
       }
@@ -150,13 +156,12 @@ private[spark] class MapOutputTracker(conf: SparkConf) extends Logging {
   }
 
   protected def cleanup(cleanupTime: Long) {
-    mapStatuses.clearOldValues(cleanupTime)
+    mapStatuses.asInstanceOf[TimeStampedHashMap[_, _]].clearOldValues(cleanupTime)
   }
 
   def stop() {
     communicate(StopMapOutputTracker)
     mapStatuses.clear()
-    metadataCleaner.cancel()
     trackerActor = null
   }
 
@@ -181,15 +186,49 @@ private[spark] class MapOutputTracker(conf: SparkConf) extends Logging {
   }
 }
 
+/**
+ * MapOutputTracker for the workers. This uses BoundedHashMap to keep track of
+ * a limited number of most recently used map output information.
+ */
+private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTracker(conf) {
+
+  /**
+   * Bounded HashMap for storing serialized statuses in the worker. This allows
+   * the HashMap stay bounded in memory-usage. Things dropped from this HashMap will be
+   * automatically repopulated by fetching them again from the driver.
+   */
+  protected val MAX_MAP_STATUSES = 100
+  protected val mapStatuses = new BoundedHashMap[Int, Array[MapStatus]](MAX_MAP_STATUSES, true)
+}
+
+/**
+ * MapOutputTracker for the driver. This uses TimeStampedHashMap to keep track of map
+ * output information, which allows old output information based on a TTL.
+ */
 private[spark] class MapOutputTrackerMaster(conf: SparkConf)
   extends MapOutputTracker(conf) {
 
   // Cache a serialized version of the output statuses for each shuffle to send them out faster
   private var cacheEpoch = epoch
-  private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
+
+  /**
+   * Timestamp based HashMap for storing mapStatuses in the master, so that statuses are dropped
+   * only by explicit deregistering or by ttl-based cleaning (if set). Other than these two
+   * scenarios, nothing should be dropped from this HashMap.
+   */
+  protected val mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]()
+
+  /**
+   * Bounded HashMap for storing serialized statuses in the master. This allows
+   * the HashMap stay bounded in memory-usage. Things dropped from this HashMap will be
+   * automatically repopulated by serializing the lost statuses again .
+   */
+  protected val MAX_SERIALIZED_STATUSES = 100
+  private val cachedSerializedStatuses =
+    new BoundedHashMap[Int, Array[Byte]](MAX_SERIALIZED_STATUSES, true)
 
   def registerShuffle(shuffleId: Int, numMaps: Int) {
-    if (mapStatuses.putIfAbsent(shuffleId, new Array[MapStatus](numMaps)).isDefined) {
+    if (mapStatuses.put(shuffleId, new Array[MapStatus](numMaps)).isDefined) {
       throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
     }
   }
@@ -223,6 +262,10 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf)
     }
   }
 
+  def unregisterShuffle(shuffleId: Int) {
+    mapStatuses.remove(shuffleId)
+  }
+
   def incrementEpoch() {
     epochLock.synchronized {
       epoch += 1
@@ -259,9 +302,8 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf)
     bytes
   }
 
-  protected override def cleanup(cleanupTime: Long) {
-    super.cleanup(cleanupTime)
-    cachedSerializedStatuses.clearOldValues(cleanupTime)
+  def contains(shuffleId: Int): Boolean = {
+    mapStatuses.contains(shuffleId)
   }
 
   override def stop() {

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -44,7 +44,7 @@ import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, Me
 import org.apache.spark.scheduler.local.LocalBackend
 import org.apache.spark.storage.{BlockManagerSource, RDDInfo, StorageStatus, StorageUtils}
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedHashMap, Utils}
+import org.apache.spark.util.{ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedWeakValueHashMap, Utils}
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -147,7 +147,7 @@ class SparkContext(
   private[spark] val addedJars = HashMap[String, Long]()
 
   // Keeps track of all persisted RDDs
-  private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]
+  private[spark] val persistentRdds = new TimeStampedWeakValueHashMap[Int, RDD[_]]
   private[spark] val metadataCleaner =
     new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf)
 
@@ -206,6 +206,9 @@ class SparkContext(
   @volatile private[spark] var dagScheduler = new DAGScheduler(taskScheduler)
   dagScheduler.start()
 
+  private[spark] val cleaner = new ContextCleaner(env)
+  cleaner.start()
+
   ui.start()
 
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
@@ -792,6 +795,7 @@ class SparkContext(
     dagScheduler = null
     if (dagSchedulerCopy != null) {
       metadataCleaner.cancel()
+      cleaner.stop()
       dagSchedulerCopy.stop()
       taskScheduler = null
       // TODO: Cache.stop()?

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -182,7 +182,7 @@ object SparkEnv extends Logging {
     val mapOutputTracker =  if (isDriver) {
       new MapOutputTrackerMaster(conf)
     } else {
-      new MapOutputTracker(conf)
+      new MapOutputTrackerWorker(conf)
     }
     mapOutputTracker.trackerActor = registerOrLookup(
       "MapOutputTracker",

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1025,6 +1025,14 @@ abstract class RDD[T: ClassTag](
     checkpointData.flatMap(_.getCheckpointFile)
   }
 
+  def cleanup() {
+    logInfo("Cleanup called on RDD " + id)
+    sc.cleaner.cleanRDD(this)
+    dependencies.filter(_.isInstanceOf[ShuffleDependency[_, _]])
+                .map(_.asInstanceOf[ShuffleDependency[_, _]].shuffleId)
+                .foreach(sc.cleaner.cleanShuffle)
+  }
+
   // =======================================================================
   // Other internal methods and fields
   // =======================================================================
@@ -1104,4 +1112,20 @@ abstract class RDD[T: ClassTag](
     new JavaRDD(this)(elementClassTag)
   }
 
+  override def finalize() {
+    try {
+      cleanup()
+    } catch {
+      case t: Throwable =>
+        // Paranoia - If logError throws error as well, report to stderr.
+        try {
+          logError("Error in finalize", t)
+        } catch {
+          case _ =>
+            System.err.println("Error in finalize (and could not write to logError): " + t)
+        }
+    } finally {
+      super.finalize()
+    }
+  }
 }