Partitioning columns can be resolved.

markhamstra · Aug 7, 2014 · a0baec7 · a0baec7
1 parent 1161338
commit a0baec7
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 5 deletions.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -76,7 +76,8 @@ private[hive] trait HiveStrategies {
         }).reduceOption(And).getOrElse(Literal(true))
 
         val unresolvedProjection = projectList.map(_ transform {
-          case a: AttributeReference => UnresolvedAttribute(a.name)
+          // Handle non-partitioning columns
+          case a: AttributeReference if !partitionKeyIds.contains(a.exprId) => UnresolvedAttribute(a.name)
         })
 
         if (relation.hiveQlTable.isPartitioned) {
@@ -109,16 +110,27 @@ private[hive] trait HiveStrategies {
           }
 
           org.apache.spark.sql.execution.Union(
-            partitions.par.map(p =>
+            partitions.par.map { p =>
+              val partValues = p.getValues()
+              val internalProjection = unresolvedProjection.map(_ transform {
+                // Handle partitioning columns
+                case a: AttributeReference if partitionKeyIds.contains(a.exprId) => {
+                  val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId)
+                  val key = relation.partitionKeys(idx)
+
+                  Alias(Cast(Literal(partValues.get(idx), StringType), key.dataType), a.name)()
+                }
+              })
+
               hiveContext
                 .parquetFile(p.getLocation)
                 .lowerCase
                 .where(unresolvedOtherPredicates)
-                .select(unresolvedProjection:_*)
+                .select(internalProjection:_*)
                 .queryExecution
                 .executedPlan
-                .fakeOutput(projectList.map(_.toAttribute))).seq) :: Nil
-
+                .fakeOutput(projectList.map(_.toAttribute))
+            }.seq) :: Nil
         } else {
           hiveContext
             .parquetFile(relation.hiveQlTable.getDataLocation.getPath)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
@@ -87,6 +87,22 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
     sql(s"ALTER TABLE partitioned_parquet ADD PARTITION (p=$p)")
   }
 
+  test("project the partitioning column") {
+    checkAnswer(
+      sql("SELECT p, count(*) FROM partitioned_parquet group by p"),
+      (1, 10) ::
+      (2, 10) ::
+      (3, 10) ::
+      (4, 10) ::
+      (5, 10) ::
+      (6, 10) ::
+      (7, 10) ::
+      (8, 10) ::
+      (9, 10) ::
+      (10, 10) :: Nil
+    )
+  }
+
   test("simple count") {
     checkAnswer(
       sql("SELECT COUNT(*) FROM partitioned_parquet"),