From c1b6fe479649c482947dfce6b6db67b159bd78a3 Mon Sep 17 00:00:00 2001 From: CodeGod <> Date: Sat, 9 Mar 2019 21:28:10 +0800 Subject: [PATCH] [SPARK-27080][SQL] bug fix: mergeWithMetastoreSchema with uniform lower case comparison When reading parquet file with merging metastore schema and file schema, we should compare field names using uniform case. In current implementation, lowercase is used but one omission. And this patch fix it. Unit test Closes #24001 from codeborui/mergeSchemaBugFix. Authored-by: CodeGod <> Signed-off-by: Wenchen Fan (cherry picked from commit a29df5fa02111f57965be2ab5e208f5c815265fe) Signed-off-by: Wenchen Fan --- .../spark/sql/hive/HiveMetastoreCatalog.scala | 2 +- .../sql/hive/HiveSchemaInferenceSuite.scala | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 8adfda07d29d5..0138de660e743 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -282,7 +282,7 @@ private[hive] object HiveMetastoreCatalog { // Merge missing nullable fields to inferred schema and build a case-insensitive field map. val inferredFields = StructType(inferredSchema ++ missingNullables) .map(f => f.name.toLowerCase -> f).toMap - StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name))) + StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name.toLowerCase).name))) } catch { case NonFatal(_) => val msg = s"""Detected conflicting schemas when merging the schema obtained from the Hive diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala index 51a48a20daaa2..b62c3c7c10099 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala @@ -263,6 +263,32 @@ class HiveSchemaInferenceSuite StructType(Seq(StructField("lowerCase", BinaryType)))) } + // Parquet schema is subset of metaStore schema and has uppercase field name + assertResult( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = true), + StructField("lowerCase", BinaryType, nullable = true)))) { + + HiveMetastoreCatalog.mergeWithMetastoreSchema( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = true), + StructField("lowerCase", BinaryType, nullable = true))), + + StructType(Seq( + StructField("lowerCase", BinaryType, nullable = true)))) + } + + // Metastore schema contains additional nullable fields. + assert(intercept[Throwable] { + HiveMetastoreCatalog.mergeWithMetastoreSchema( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = false), + StructField("lowerCase", BinaryType, nullable = true))), + + StructType(Seq( + StructField("lowerCase", BinaryType, nullable = true)))) + }.getMessage.contains("Detected conflicting schemas")) + // Check that merging missing nullable fields works as expected. assertResult( StructType(Seq(