Skip to content

Commit

Permalink
[SPARK-2086] Improve output of toDebugString to make shuffle boundari…
Browse files Browse the repository at this point in the history
…es more clear

Changes RDD.toDebugString() to show hierarchy and shuffle transformations more clearly

New output:

```
(3) FlatMappedValuesRDD[325] at apply at Transformer.scala:22
 |  MappedValuesRDD[324] at apply at Transformer.scala:22
 |  CoGroupedRDD[323] at apply at Transformer.scala:22
 +-(5) MappedRDD[320] at apply at Transformer.scala:22
 |  |  MappedRDD[319] at apply at Transformer.scala:22
 |  |  MappedValuesRDD[318] at apply at Transformer.scala:22
 |  |  MapPartitionsRDD[317] at apply at Transformer.scala:22
 |  |  ShuffledRDD[316] at apply at Transformer.scala:22
 |  +-(10) MappedRDD[315] at apply at Transformer.scala:22
 |     |   ParallelCollectionRDD[314] at apply at Transformer.scala:22
 +-(100) MappedRDD[322] at apply at Transformer.scala:22
     |   ParallelCollectionRDD[321] at apply at Transformer.scala:22
```

Author: Gregory Owen <[email protected]>

Closes apache#1364 from GregOwen/to-debug-string and squashes the following commits:

08f5c78 [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly
1603f7b [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly
  • Loading branch information
GregOwen authored and pwendell committed Jul 22, 2014
1 parent 511a731 commit c3462c6
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 4 deletions.
52 changes: 48 additions & 4 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1269,11 +1269,55 @@ abstract class RDD[T: ClassTag](

/** A description of this RDD and its recursive dependencies for debugging. */
def toDebugString: String = {
def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
Seq(prefix + rdd + " (" + rdd.partitions.size + " partitions)") ++
rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + " "))
// Apply a different rule to the last child
def debugChildren(rdd: RDD[_], prefix: String): Seq[String] = {
val len = rdd.dependencies.length
len match {
case 0 => Seq.empty
case 1 =>
val d = rdd.dependencies.head
debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]], true)
case _ =>
val frontDeps = rdd.dependencies.take(len - 1)
val frontDepStrings = frontDeps.flatMap(
d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]]))

val lastDep = rdd.dependencies.last
val lastDepStrings =
debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_,_,_]], true)

(frontDepStrings ++ lastDepStrings)
}
}
// The first RDD in the dependency stack has no parents, so no need for a +-
def firstDebugString(rdd: RDD[_]): Seq[String] = {
val partitionStr = "(" + rdd.partitions.size + ")"
val leftOffset = (partitionStr.length - 1) / 2
val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))
Seq(partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
}
def shuffleDebugString(rdd: RDD[_], prefix: String = "", isLastChild: Boolean): Seq[String] = {
val partitionStr = "(" + rdd.partitions.size + ")"
val leftOffset = (partitionStr.length - 1) / 2
val thisPrefix = prefix.replaceAll("\\|\\s+$", "")
val nextPrefix = (
thisPrefix
+ (if (isLastChild) " " else "| ")
+ (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)))
Seq(thisPrefix + "+-" + partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
}
def debugString(rdd: RDD[_],
prefix: String = "",
isShuffle: Boolean = true,
isLastChild: Boolean = false): Seq[String] = {
if (isShuffle) {
shuffleDebugString(rdd, prefix, isLastChild)
}
else {
Seq(prefix + rdd) ++ debugChildren(rdd, prefix)
}
}
debugString(this).mkString("\n")
firstDebugString(this).mkString("\n")
}

override def toString: String = "%s%s[%d] at %s".format(
Expand Down
8 changes: 8 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ object MimaExcludes {
"org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.storage.MemoryStore.Entry"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugChildren$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$firstDebugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$shuffleDebugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
+ "createZero$1")
Expand Down

0 comments on commit c3462c6

Please sign in to comment.