Skip to content

Commit

Permalink
[SPARK-12517] add default RDD name for one created via sc.textFile
Browse files Browse the repository at this point in the history
The feature was first added at commit: 7b877b2 but was later removed (probably by mistake) at commit: fc8b581.
This change sets the default path of RDDs created via sc.textFile(...) to the path argument.

Here is the symptom:

* Using spark-1.5.2-bin-hadoop2.6:

scala> sc.textFile("/home/root/.bashrc").name
res5: String = null

scala> sc.binaryFiles("/home/root/.bashrc").name
res6: String = /home/root/.bashrc

* while using Spark 1.3.1:

scala> sc.textFile("/home/root/.bashrc").name
res0: String = /home/root/.bashrc

scala> sc.binaryFiles("/home/root/.bashrc").name
res1: String = /home/root/.bashrc

Author: Yaron Weinsberg <[email protected]>
Author: yaron <[email protected]>

Closes #10456 from wyaron/master.
  • Loading branch information
wyaron authored and sarutak committed Dec 28, 2015
1 parent fd50df4 commit 73b70f0
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
assertNotStopped()
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
minPartitions).map(pair => pair._2.toString)
minPartitions).map(pair => pair._2.toString).setName(path)
}

/**
Expand Down Expand Up @@ -885,7 +885,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
classOf[Text],
classOf[Text],
updateConf,
minPartitions).setName(path).map(record => (record._1.toString, record._2.toString))
minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path)
}

/**
Expand Down
25 changes: 25 additions & 0 deletions core/src/test/scala/org/apache/spark/SparkContextSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
}
}

test("Default path for file based RDDs is properly set (SPARK-12517)") {
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))

// Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and
// newAPIHadoopFile for setting the default path as the RDD name
val mockPath = "default/path/for/"

var targetPath = mockPath + "textFile"
assert(sc.textFile(targetPath).name === targetPath)

targetPath = mockPath + "wholeTextFiles"
assert(sc.wholeTextFiles(targetPath).name === targetPath)

targetPath = mockPath + "binaryFiles"
assert(sc.binaryFiles(targetPath).name === targetPath)

targetPath = mockPath + "hadoopFile"
assert(sc.hadoopFile(targetPath).name === targetPath)

targetPath = mockPath + "newAPIHadoopFile"
assert(sc.newAPIHadoopFile(targetPath).name === targetPath)

sc.stop()
}

test("calling multiple sc.stop() must not throw any exception") {
noException should be thrownBy {
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
Expand Down

0 comments on commit 73b70f0

Please sign in to comment.