From dec978d5931ba6e6645bb1eb585195c6c49b842a Mon Sep 17 00:00:00 2001 From: CodeGod <> Date: Thu, 7 Mar 2019 13:22:44 +0800 Subject: [PATCH] bug fix: mergeWithMetastoreSchema with Uuniform lower case comparison (cherry picked from commit f47a765) --- .../spark/sql/hive/HiveMetastoreCatalog.scala | 2 +- .../sql/hive/HiveSchemaInferenceSuite.scala | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 03f4b8d83e353..d6b2945b2ea7a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -326,8 +326,8 @@ private[hive] object HiveMetastoreCatalog { // Merge missing nullable fields to inferred schema and build a case-insensitive field map. val inferredFields = StructType(inferredSchema ++ missingNullables) .map(f => f.name.toLowerCase -> f).toMap + StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name.toLowerCase).name))) // scalastyle:on caselocale - StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name))) } catch { case NonFatal(_) => val msg = s"""Detected conflicting schemas when merging the schema obtained from the Hive diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala index aa4fc13333c48..590ef949ffbd7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala @@ -264,6 +264,32 @@ class HiveSchemaInferenceSuite StructType(Seq(StructField("lowerCase", BinaryType)))) } + // Parquet schema is subset of metaStore schema and has uppercase field name + assertResult( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = true), + StructField("lowerCase", BinaryType, nullable = true)))) { + + HiveMetastoreCatalog.mergeWithMetastoreSchema( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = true), + StructField("lowerCase", BinaryType, nullable = true))), + + StructType(Seq( + StructField("lowerCase", BinaryType, nullable = true)))) + } + + // Metastore schema contains additional nullable fields. + assert(intercept[Throwable] { + HiveMetastoreCatalog.mergeWithMetastoreSchema( + StructType(Seq( + StructField("UPPERCase", DoubleType, nullable = false), + StructField("lowerCase", BinaryType, nullable = true))), + + StructType(Seq( + StructField("lowerCase", BinaryType, nullable = true)))) + }.getMessage.contains("Detected conflicting schemas")) + // Check that merging missing nullable fields works as expected. assertResult( StructType(Seq(