From 6591aa4e586763cd39a4ccf67a09e4ce1aeedf6a Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Thu, 8 Dec 2016 12:52:05 -0800 Subject: [PATCH] [SPARK-18760][SQL] Consistent format specification for FileFormats ## What changes were proposed in this pull request? This patch fixes the format specification in explain for file sources (Parquet and Text formats are the only two that are different from the rest): Before: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` After: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: Text, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` Also closes #14680. ## How was this patch tested? Verified in spark-shell. Author: Reynold Xin Closes #16187 from rxin/SPARK-18760. --- .../execution/datasources/parquet/ParquetFileFormat.scala | 2 +- .../sql/execution/datasources/text/TextFileFormat.scala | 2 ++ .../apache/spark/sql/streaming/FileStreamSourceSuite.scala | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 031a0fe57893f..0965ffebea962 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -61,7 +61,7 @@ class ParquetFileFormat override def shortName(): String = "parquet" - override def toString: String = "ParquetFormat" + override def toString: String = "Parquet" override def hashCode(): Int = getClass.hashCode() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala index 178160cd71b05..897e535953331 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala @@ -39,6 +39,8 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister { override def shortName(): String = "text" + override def toString: String = "Text" + private def verifySchema(schema: StructType): Unit = { if (schema.size != 1) { throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 7b6fe83b9a597..267c462484a32 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -class FileStreamSourceTest extends StreamTest with SharedSQLContext with PrivateMethodTester { +abstract class FileStreamSourceTest + extends StreamTest with SharedSQLContext with PrivateMethodTester { import testImplicits._ @@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest { val explainWithoutExtended = q.explainInternal(false) // `extended = false` only displays the physical plan. assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size === 0) - assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size === 1) + assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1) val explainWithExtended = q.explainInternal(true) // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical // plan. assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 3) - assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 1) + assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1) } finally { q.stop() }