From 5e1648074ffb96f1b2104dc5ea3d78d25e505181 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 18 Mar 2016 15:52:00 -0700 Subject: [PATCH] Fix tests round 2 There were some issues with case sensitivity analysis and error messages not being exactly as expected. The latter is now relaxed where possible. --- .../sql/catalyst/analysis/Analyzer.scala | 12 +++++----- .../catalyst/catalog/InMemoryCatalog.scala | 8 ++++--- .../sql/catalyst/catalog/SessionCatalog.scala | 9 ++++++-- .../sql/catalyst/analysis/AnalysisTest.scala | 2 +- .../analysis/DecimalPrecisionSuite.scala | 2 +- .../catalog/SessionCatalogSuite.scala | 10 ++++----- .../spark/sql/execution/datasources/ddl.scala | 10 +++++++++ .../spark/sql/internal/SessionState.scala | 2 +- .../org/apache/spark/sql/SQLQuerySuite.scala | 22 +++++++++++-------- .../spark/sql/hive/HiveSessionCatalog.scala | 5 +++-- .../spark/sql/hive/HiveSessionState.scala | 3 +-- .../sql/hive/execution/SQLQuerySuite.scala | 4 ++-- 12 files changed, 53 insertions(+), 36 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7b186d1cdd198..c6d6975900ad6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -40,16 +40,14 @@ import org.apache.spark.sql.types._ * Used for testing when all relations are already filled in and the analyzer needs only * to resolve attribute references. */ -object SimpleAnalyzer - extends Analyzer( - new SessionCatalog(new InMemoryCatalog), - EmptyFunctionRegistry, - new SimpleCatalystConf(true)) +object SimpleAnalyzer extends SimpleAnalyzer(new SimpleCatalystConf(true)) +class SimpleAnalyzer(conf: CatalystConf) + extends Analyzer(new SessionCatalog(new InMemoryCatalog, conf), EmptyFunctionRegistry, conf) /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and - * [[UnresolvedRelation]]s into fully typed objects using information in a schema [[Catalog]] and - * a [[FunctionRegistry]]. + * [[UnresolvedRelation]]s into fully typed objects using information in a + * [[SessionCatalog]] and a [[FunctionRegistry]]. */ class Analyzer( catalog: SessionCatalog, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 83be9451184e3..888b92922e038 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -64,20 +64,22 @@ class InMemoryCatalog extends ExternalCatalog { private def requireFunctionExists(db: String, funcName: String): Unit = { if (!functionExists(db, funcName)) { - throw new AnalysisException(s"Function '$funcName' does not exist in database '$db'") + throw new AnalysisException( + s"Function not found: '$funcName' does not exist in database '$db'") } } private def requireTableExists(db: String, table: String): Unit = { if (!tableExists(db, table)) { - throw new AnalysisException(s"Table '$table' does not exist in database '$db'") + throw new AnalysisException( + s"Table not found: '$table' does not exist in database '$db'") } } private def requirePartitionExists(db: String, table: String, spec: TablePartitionSpec): Unit = { if (!partitionExists(db, table, spec)) { throw new AnalysisException( - s"Partition does not exist in database '$db' table '$table': '$spec'") + s"Partition not found: database '$db' table '$table' does not contain: '$spec'") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 34cf55a3fe553..91b40df3a3735 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -22,6 +22,7 @@ import java.util.concurrent.ConcurrentHashMap import scala.collection.JavaConverters._ import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} @@ -31,9 +32,13 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} * proxy to the underlying metastore (e.g. Hive Metastore) and it also manages temporary * tables and functions of the Spark Session that it belongs to. */ -class SessionCatalog(externalCatalog: ExternalCatalog, caseSensitiveAnalysis: Boolean = true) { +class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) { import ExternalCatalog._ + def this(externalCatalog: ExternalCatalog) { + this(externalCatalog, new SimpleCatalystConf(true)) + } + protected[this] val tempTables = new ConcurrentHashMap[String, LogicalPlan] protected[this] val tempFunctions = new ConcurrentHashMap[String, CatalogFunction] @@ -53,7 +58,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, caseSensitiveAnalysis: Bo * Format table name, taking into account case sensitivity. */ protected[this] def formatTableName(name: String): String = { - if (caseSensitiveAnalysis) name else name.toLowerCase + if (conf.caseSensitiveAnalysis) name else name.toLowerCase } // ---------------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala index e5f071787f429..6fa4beed99267 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala @@ -30,7 +30,7 @@ trait AnalysisTest extends PlanTest { private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) - val catalog = new SessionCatalog(new InMemoryCatalog, caseSensitive) + val catalog = new SessionCatalog(new InMemoryCatalog, conf) catalog.createTempTable("TaBlE", TestRelations.testRelation, ignoreIfExists = true) new Analyzer(catalog, EmptyFunctionRegistry, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala index 2779d439da847..144e77471c705 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.types._ class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter { private val conf = new SimpleCatalystConf(true) - private val catalog = new SessionCatalog(new InMemoryCatalog, caseSensitiveAnalysis = true) + private val catalog = new SessionCatalog(new InMemoryCatalog, conf) private val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf) private val relation = LocalRelation( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index 0351614b3599e..f0bc629197f65 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -427,9 +427,8 @@ class SessionCatalogSuite extends SparkFunSuite { TableIdentifier("tbl4"), TableIdentifier("tbl1", Some("db2")), TableIdentifier("tbl2", Some("db2")))) - intercept[AnalysisException] { - catalog.listTables("unknown_db") - } + assert(catalog.listTables("unknown_db").toSet == + Set(TableIdentifier("tbl1"), TableIdentifier("tbl4"))) } test("list tables with pattern") { @@ -446,9 +445,8 @@ class SessionCatalogSuite extends SparkFunSuite { TableIdentifier("tbl2", Some("db2")))) assert(catalog.listTables("db2", "*1").toSet == Set(TableIdentifier("tbl1"), TableIdentifier("tbl1", Some("db2")))) - intercept[AnalysisException] { - catalog.listTables("unknown_db") - } + assert(catalog.listTables("unknown_db", "*").toSet == + Set(TableIdentifier("tbl1"), TableIdentifier("tbl4"))) } // -------------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala index a1ae050d6c49f..80d1ecaef173f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala @@ -93,6 +93,11 @@ case class CreateTempTableUsing( provider: String, options: Map[String, String]) extends RunnableCommand { + if (tableIdent.database.isDefined) { + throw new AnalysisException( + s"Temporary table '$tableIdent' should not have specified a database") + } + def run(sqlContext: SQLContext): Seq[Row] = { val dataSource = DataSource( sqlContext, @@ -116,6 +121,11 @@ case class CreateTempTableUsingAsSelect( options: Map[String, String], query: LogicalPlan) extends RunnableCommand { + if (tableIdent.database.isDefined) { + throw new AnalysisException( + s"Temporary table '$tableIdent' should not have specified a database") + } + override def run(sqlContext: SQLContext): Seq[Row] = { val df = Dataset.newDataFrame(sqlContext, query) val dataSource = DataSource( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index a5d6f39fc626e..bdd2d49a22b8b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -46,7 +46,7 @@ private[sql] class SessionState(ctx: SQLContext) { /** * Internal catalog for managing table and database states. */ - lazy val sessionCatalog = new SessionCatalog(ctx.externalCatalog) + lazy val sessionCatalog = new SessionCatalog(ctx.externalCatalog, conf) /** * Internal catalog for managing functions registered by the user. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 6716982118fed..512d327fd0a0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1395,12 +1395,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } test("SPARK-4699 case sensitivity SQL query") { - sqlContext.setConf(SQLConf.CASE_SENSITIVE, false) - val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil - val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) - rdd.toDF().registerTempTable("testTable1") - checkAnswer(sql("SELECT VALUE FROM TESTTABLE1 where KEY = 1"), Row("val_1")) - sqlContext.setConf(SQLConf.CASE_SENSITIVE, true) + val orig = sqlContext.getConf(SQLConf.CASE_SENSITIVE) + try { + sqlContext.setConf(SQLConf.CASE_SENSITIVE, false) + val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil + val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) + rdd.toDF().registerTempTable("testTable1") + checkAnswer(sql("SELECT VALUE FROM TESTTABLE1 where KEY = 1"), Row("val_1")) + } finally { + sqlContext.setConf(SQLConf.CASE_SENSITIVE, orig) + } } test("SPARK-6145: ORDER BY test for nested fields") { @@ -1674,7 +1678,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { .format("parquet") .save(path) - val message = intercept[AnalysisException] { + // We don't support creating a temporary table while specifying a database + intercept[AnalysisException] { sqlContext.sql( s""" |CREATE TEMPORARY TABLE db.t @@ -1684,9 +1689,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { |) """.stripMargin) }.getMessage - assert(message.contains("Specifying database name or other qualifiers are not allowed")) - // If you use backticks to quote the name of a temporary table having dot in it. + // If you use backticks to quote the name then it's OK. sqlContext.sql( s""" |CREATE TEMPORARY TABLE `db.t` diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index 6a75a4cf1fdba..aa44cba4b5641 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.BucketSpec import org.apache.spark.sql.hive.client.HiveClient +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType @@ -30,8 +31,8 @@ class HiveSessionCatalog( externalCatalog: HiveCatalog, client: HiveClient, context: HiveContext, - caseSensitiveAnalysis: Boolean) - extends SessionCatalog(externalCatalog, caseSensitiveAnalysis) { + conf: SQLConf) + extends SessionCatalog(externalCatalog, conf) { override def setCurrentDatabase(db: String): Unit = { super.setCurrentDatabase(db) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala index 399410d889180..12f7444619a0d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala @@ -38,8 +38,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx) * Internal catalog for managing table and database states. */ override lazy val sessionCatalog = { - new HiveSessionCatalog( - ctx.hiveCatalog, ctx.metadataHive, ctx, caseSensitiveAnalysis = false) + new HiveSessionCatalog(ctx.hiveCatalog, ctx.metadataHive, ctx, conf) } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index ca6199f3b7f76..5d63aa0854285 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1321,6 +1321,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { .format("parquet") .save(path) + // We don't support creating a temporary table while specifying a database val message = intercept[AnalysisException] { sqlContext.sql( s""" @@ -1331,9 +1332,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |) """.stripMargin) }.getMessage - assert(message.contains("Specifying database name or other qualifiers are not allowed")) - // If you use backticks to quote the name of a temporary table having dot in it. + // If you use backticks to quote the name then it's OK. sqlContext.sql( s""" |CREATE TEMPORARY TABLE `db.t`