vanzin · vanzin · Jan 31, 2017
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -455,7 +455,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     _ui =
       if (conf.getBoolean("spark.ui.enabled", true)) {
-        Some(SparkUI.create(Some(this), _stateStore, _conf, listenerBus, _env.securityManager,
+        Some(SparkUI.create(Some(this), _stateStore, _conf, _env.securityManager,
           appName, "", startTime))
       } else {
         // For tests, do not enable the UI

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -326,8 +326,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     val loadedUI = {
-      val ui = SparkUI.create(None, new AppStateStore(kvstore), conf, replayBus, secManager,
-        app.info.name,
+      val ui = SparkUI.create(None, new AppStateStore(kvstore), conf, secManager, app.info.name,
         HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
         attempt.info.startTime.getTime())
       LoadedAppUI(ui)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStateListener.scala b/core/src/main/scala/org/apache/spark/status/AppStateListener.scala
@@ -61,6 +61,7 @@ private[spark] class AppStateListener(
   private val liveExecutors = new HashMap[String, LiveExecutor]()
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
+  private val pools = new HashMap[String, SchedulerPool]()
 
   override def onApplicationStart(event: SparkListenerApplicationStart): Unit = {
     assert(event.appId.isDefined, "Application without IDs are not supported.")
@@ -200,9 +201,8 @@ private[spark] class AppStateListener(
       missingStages.map(_.numTasks).sum
     }
 
-    val lastStageInfo = event.stageInfos.lastOption
+    val lastStageInfo = event.stageInfos.sortBy(_.stageId).lastOption
     val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)")
-
     val jobGroup = Option(event.properties)
       .flatMap { p => Option(p.getProperty(SparkContext.SPARK_JOB_GROUP_ID)) }
 
@@ -225,10 +225,41 @@ private[spark] class AppStateListener(
       liveUpdate(stage)
     }
 
+    // Create the graph data for all the job's stages.
+    event.stageInfos.foreach { stage =>
+      val graph = RDDOperationGraph.makeOperationGraph(stage, Int.MaxValue)
+      val uigraph = new RDDOperationGraphWrapper(
+        stage.stageId,
+        graph.edges,
+        graph.outgoingEdges,
+        graph.incomingEdges,
+        newRDDOperationCluster(graph.rootCluster))
+      kvstore.write(uigraph)
+    }
+  }
+
+  private def newRDDOperationCluster(cluster: RDDOperationCluster): RDDOperationClusterWrapper = {
+    new RDDOperationClusterWrapper(
+      cluster.id,
+      cluster.name,
+      cluster.childNodes,
+      cluster.childClusters.map(newRDDOperationCluster))
   }
 
   override def onJobEnd(event: SparkListenerJobEnd): Unit = {
     liveJobs.remove(event.jobId).foreach { job =>
+      // Check if there are any pending stages that match this job; mark those as skipped.
+      job.stageIds.foreach { sid =>
+        val pending = liveStages.filter { case ((id, _), _) => id == sid }
+        pending.foreach { case (key, stage) =>
+          stage.status = v1.StageStatus.SKIPPED
+          job.skippedStages += stage.info.stageId
+          job.skippedTasks += stage.info.numTasks
+          liveStages.remove(key)
+          update(stage)
+        }
+      }
+
       job.status = event.jobResult match {
         case JobSucceeded => JobExecutionStatus.SUCCEEDED
         case JobFailed(_) => JobExecutionStatus.FAILED
@@ -253,12 +284,20 @@ private[spark] class AppStateListener(
       Option(p.getProperty("spark.scheduler.pool"))
     }.getOrElse(SparkUI.DEFAULT_POOL_NAME)
 
+    stage.description = Option(event.properties).flatMap { p =>
+      Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
+    }
+
     stage.jobs.foreach { job =>
       job.completedStages = job.completedStages - event.stageInfo.stageId
       job.activeStages += 1
       liveUpdate(job)
     }
 
+    val pool = pools.getOrElseUpdate(stage.schedulingPool, new SchedulerPool(stage.schedulingPool))
+    pool.stageIds = pool.stageIds + event.stageInfo.stageId
+    update(pool)
+
     event.stageInfo.rddInfos.foreach { info =>
       if (info.storageLevel.isValid) {
         liveUpdate(liveRDDs.getOrElseUpdate(info.id, new LiveRDD(info)))
@@ -306,6 +345,8 @@ private[spark] class AppStateListener(
     }
 
     val metricsDelta = liveTasks.remove(event.taskInfo.taskId).map { task =>
+      task.info = event.taskInfo
+
       val errorMessage = event.reason match {
         case Success =>
           None
@@ -322,11 +363,13 @@ private[spark] class AppStateListener(
       delta
     }.orNull
 
-    val (completedDelta, failedDelta) = event.reason match {
+    val (completedDelta, failedDelta, killedDelta) = event.reason match {
       case Success =>
-        (1, 0)
+        (1, 0, 0)
+      case _: TaskKilled =>
+        (0, 0, 1)
       case _ =>
-        (0, 1)
+        (0, 1, 0)
     }
 
     liveStages.get((event.stageId, event.stageAttemptId)).foreach { stage =>
@@ -336,19 +379,28 @@ private[spark] class AppStateListener(
       stage.activeTasks -= 1
       stage.completedTasks += completedDelta
       stage.failedTasks += failedDelta
+      stage.killedTasks += killedDelta
+      if (killedDelta > 0) {
+        stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary)
+      }
       liveUpdate(stage)
 
       stage.jobs.foreach { job =>
         job.activeTasks -= 1
         job.completedTasks += completedDelta
         job.failedTasks += failedDelta
+        job.killedTasks += killedDelta
+        if (killedDelta > 0) {
+          job.killedSummary = killedTasksSummary(event.reason, job.killedSummary)
+        }
         liveUpdate(job)
       }
 
       val esummary = stage.executorSummary(event.taskInfo.executorId)
       esummary.taskTime += event.taskInfo.duration
       esummary.succeededTasks += completedDelta
       esummary.failedTasks += failedDelta
+      esummary.killedTasks += killedDelta
       if (metricsDelta != null) {
         esummary.metrics.update(metricsDelta)
       }
@@ -399,6 +451,11 @@ private[spark] class AppStateListener(
         liveUpdate(job)
       }
 
+      pools.get(stage.schedulingPool).foreach { pool =>
+        pool.stageIds = pool.stageIds - event.stageInfo.stageId
+        update(pool)
+      }
+
       stage.executorSummaries.values.foreach(update)
       update(stage)
     }
@@ -456,11 +513,15 @@ private[spark] class AppStateListener(
 
   /** Flush all live entities' data to the underlying store. */
   def flush(): Unit = {
-    liveStages.values.foreach(update)
+    liveStages.values.foreach { stage =>
+      update(stage)
+      stage.executorSummaries.values.foreach(update)
+    }
     liveJobs.values.foreach(update)
     liveExecutors.values.foreach(update)
     liveTasks.values.foreach(update)
     liveRDDs.values.foreach(update)
+    pools.values.foreach(update)
   }
 
   private def updateRDDBlock(event: SparkListenerBlockUpdated, block: RDDBlockId): Unit = {
@@ -595,6 +656,17 @@ private[spark] class AppStateListener(
     stage
   }
 
+  private def killedTasksSummary(
+      reason: TaskEndReason,
+      oldSummary: Map[String, Int]): Map[String, Int] = {
+    reason match {
+      case k: TaskKilled =>
+        oldSummary.updated(k.reason, oldSummary.getOrElse(k.reason, 0) + 1)
+      case _ =>
+        oldSummary
+    }
+  }
+
   private def update(entity: LiveEntity): Unit = {
     entity.write(kvstore)
   }

diff --git a/core/src/main/scala/org/apache/spark/status/AppStateStore.scala b/core/src/main/scala/org/apache/spark/status/AppStateStore.scala
@@ -26,21 +26,32 @@ import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.kvstore.{InMemoryStore, KVStore}
 import org.apache.spark.scheduler.{SparkListenerBus, SparkListenerEvent}
 import org.apache.spark.status.api.v1
+import org.apache.spark.ui.scope._
 import org.apache.spark.util.{Distribution, Utils}
 
 /**
  * A wrapper around a KVStore that provides methods for accessing the API data stored within.
  */
 private[spark] class AppStateStore(store: KVStore) {
 
+  def appInfo(): v1.ApplicationInfo = {
+    // There should be a single application info for a UIStore instance, so do no checks here.
+    val it = store.view(classOf[ApplicationInfoWrapper]).closeableIterator()
+    try {
+      it.next().info
+    } finally {
+      it.close()
+    }
+  }
+
   def environmentInfo(): v1.ApplicationEnvironmentInfo = {
     val klass = classOf[ApplicationEnvironmentInfoWrapper]
     store.read(klass, klass.getName()).info
   }
 
   def jobsList(statuses: JList[JobExecutionStatus]): Seq[v1.JobData] = {
-    val it = store.view(classOf[JobDataWrapper]).sorted().asScala.map(_.info)
-    if (!statuses.isEmpty()) {
+    val it = store.view(classOf[JobDataWrapper]).sorted().reverse().asScala.map(_.info)
+    if (statuses != null && !statuses.isEmpty()) {
       it.filter { job => statuses.contains(job.status) }.toSeq
     } else {
       it.toSeq
@@ -66,21 +77,35 @@ private[spark] class AppStateStore(store: KVStore) {
   }
 
   def stageList(statuses: JList[v1.StageStatus]): Seq[v1.StageData] = {
-    val it = store.view(classOf[StageDataWrapper]).sorted().asScala.map(_.info)
-    if (!statuses.isEmpty) {
+    val it = store.view(classOf[StageDataWrapper]).sorted().reverse().asScala.map(_.info)
+    if (statuses != null && !statuses.isEmpty()) {
       it.filter { s => statuses.contains(s.status) }.toSeq
     } else {
       it.toSeq
     }
   }
 
-  def stageData(stageId: Int): Seq[v1.StageData] = {
+  def stageData(stageId: Int, details: Boolean = false): Seq[v1.StageData] = {
     store.view(classOf[StageDataWrapper]).index("stageId").first(stageId).last(stageId)
-      .asScala.map(_.info).toSeq
+      .asScala.map { s =>
+        if (details) stageWithDetails(s.info) else s.info
+      }.toSeq
   }
 
-  def stageAttempt(stageId: Int, stageAttemptId: Int): v1.StageData = {
-    store.read(classOf[StageDataWrapper], Array(stageId, stageAttemptId)).info
+  def lastStageAttempt(stageId: Int): v1.StageData = {
+    val it = store.view(classOf[StageDataWrapper]).index("stageId").reverse().first(stageId)
+      .closeableIterator()
+    try {
+      it.next().info
+    } finally {
+      it.close()
+    }
+  }
+
+  def stageAttempt(stageId: Int, stageAttemptId: Int, details: Boolean = false): v1.StageData = {
+    val stageKey = Array(stageId, stageAttemptId)
+    val stage = store.read(classOf[StageDataWrapper], stageKey).info
+    if (details) stageWithDetails(stage) else stage
   }
 
   def taskSummary(
@@ -179,6 +204,12 @@ private[spark] class AppStateStore(store: KVStore) {
     )
   }
 
+  def taskList(stageId: Int, stageAttemptId: Int, maxTasks: Int): Seq[v1.TaskData] = {
+    val stageKey = Array(stageId, stageAttemptId)
+    store.view(classOf[TaskDataWrapper]).index("stage").first(stageKey).last(stageKey).reverse()
+      .max(maxTasks).asScala.map(_.info).toSeq.reverse
+  }
+
   def taskList(
       stageId: Int,
       stageAttemptId: Int,
@@ -199,6 +230,55 @@ private[spark] class AppStateStore(store: KVStore) {
     indexed.skip(offset).max(length).asScala.map(_.info).toSeq
   }
 
+  private def stageWithDetails(stage: v1.StageData): v1.StageData = {
+    // TODO: limit tasks returned.
+    val maxTasks = Int.MaxValue
+    val tasks = taskList(stage.stageId, stage.attemptId, maxTasks)
+      .map { t => (t.taskId, t) }
+      .toMap
+
+    val stageKey = Array(stage.stageId, stage.attemptId)
+    val execs = store.view(classOf[ExecutorStageSummaryWrapper]).index("stage").first(stageKey)
+      .last(stageKey).closeableIterator().asScala
+      .map { exec => (exec.executorId -> exec.info) }
+      .toMap
+
+    new v1.StageData(
+      stage.status,
+      stage.stageId,
+      stage.attemptId,
+      stage.numTasks,
+      stage.numActiveTasks,
+      stage.numCompleteTasks,
+      stage.numFailedTasks,
+      stage.numKilledTasks,
+      stage.executorRunTime,
+      stage.executorCpuTime,
+      stage.submissionTime,
+      stage.firstTaskLaunchedTime,
+      stage.completionTime,
+      stage.failureReason,
+      stage.inputBytes,
+      stage.inputRecords,
+      stage.outputBytes,
+      stage.outputRecords,
+      stage.shuffleReadBytes,
+      stage.shuffleReadRecords,
+      stage.shuffleWriteBytes,
+      stage.shuffleWriteRecords,
+      stage.memoryBytesSpilled,
+      stage.diskBytesSpilled,
+      stage.name,
+      stage.description,
+      stage.details,
+      stage.schedulingPool,
+      stage.rddIds,
+      stage.accumulatorUpdates,
+      Some(tasks),
+      Some(execs),
+      stage.killedTasksSummary)
+  }
+
   def rddList(cachedOnly: Boolean = true): Seq[v1.RDDStorageInfo] = {
     store.view(classOf[RDDStorageInfoWrapper]).sorted().asScala.map(_.info).filter { rdd =>
       !cachedOnly || rdd.numCachedPartitions > 0
@@ -217,6 +297,27 @@ private[spark] class AppStateStore(store: KVStore) {
     store.view(classOf[StreamBlockData]).asScala.toSeq
   }
 
+  def operationGraphForStage(stageId: Int): RDDOperationGraph = {
+    store.read(classOf[RDDOperationGraphWrapper], stageId).toRDDOperationGraph()
+  }
+
+  def operationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = {
+    val job = store.read(classOf[JobDataWrapper], jobId)
+    val stages = job.info.stageIds
+
+    stages.map { id =>
+      val g = store.read(classOf[RDDOperationGraphWrapper], id).toRDDOperationGraph()
+      if (job.skippedStages.contains(id) && !g.rootCluster.name.contains("skipped")) {
+        g.rootCluster.setName(g.rootCluster.name + " (skipped)")
+      }
+      g
+    }
+  }
+
+  def pool(name: String): PoolData = {
+    store.read(classOf[PoolData], name)
+  }
+
   def close(): Unit = {
     store.close()
   }