From df3d4fd9ca77b5f4416275ccc6d6f4f8f107f8b0 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 28 Aug 2024 10:57:39 +0200 Subject: [PATCH] [SPARK-49438][SQL] Fix the pretty name of the `FromAvro` & `ToAvro` expression ### What changes were proposed in this pull request? In the PR, I propose to override the `prettyName` method of the `FromAvro` & `ToAvro` expression and set to `from_avro` & `to_avro` by default as in `FunctionRegistry`: https://github.com/apache/spark/blob/6d8235f3b2bbaa88b10c35d6eecddffa4d1b04a4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L873-L874 ### Why are the changes needed? To don't confuse users by non-existent function name, and print correct name in errors. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Update existed UT. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47900 from panbingkun/SPARK-49438. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../org/apache/spark/sql/avro/AvroFunctionsSuite.scala | 6 +++--- .../sql/catalyst/expressions/toFromAvroSqlFunctions.scala | 7 +++++++ .../sql-tests/analyzer-results/to_from_avro.sql.out | 6 +++--- .../test/resources/sql-tests/results/to_from_avro.sql.out | 6 +++--- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala index 42ddfd48892c8..7001fa96deb80 100644 --- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala +++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala @@ -328,7 +328,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession { |select to_avro(s, 42) as result from t |""".stripMargin)), errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", - parameters = Map("sqlExpr" -> "\"toavro(s, 42)\"", + parameters = Map("sqlExpr" -> "\"to_avro(s, 42)\"", "msg" -> ("The second argument of the TO_AVRO SQL function must be a constant string " + "containing the JSON representation of the schema to use for converting the value to " + "AVRO format"), @@ -343,7 +343,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession { |select from_avro(s, 42, '') as result from t |""".stripMargin)), errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", - parameters = Map("sqlExpr" -> "\"fromavro(s, 42, )\"", + parameters = Map("sqlExpr" -> "\"from_avro(s, 42, )\"", "msg" -> ("The second argument of the FROM_AVRO SQL function must be a constant string " + "containing the JSON representation of the schema to use for converting the value " + "from AVRO format"), @@ -360,7 +360,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession { errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", parameters = Map( "sqlExpr" -> - s"\"fromavro(s, $jsonFormatSchema, 42)\"".stripMargin, + s"\"from_avro(s, $jsonFormatSchema, 42)\"".stripMargin, "msg" -> ("The third argument of the FROM_AVRO SQL function must be a constant map of " + "strings to strings containing the options to use for converting the value " + "from AVRO format"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala index 46f80cacc4fed..58bddafac0882 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.ArrayBasedMapData import org.apache.spark.sql.errors.QueryCompilationErrors @@ -117,6 +118,9 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex val expr = constructor.newInstance(child, schemaValue, optionsValue) expr.asInstanceOf[Expression] } + + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("from_avro") } /** @@ -189,4 +193,7 @@ case class ToAvro(child: Expression, jsonFormatSchema: Expression) val expr = constructor.newInstance(child, schemaValue) expr.asInstanceOf[Expression] } + + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_avro") } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out index 951a4025d5fb2..8275e4f1c0ff0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out @@ -36,7 +36,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format", - "sqlExpr" : "\"fromavro(s, 42, map())\"" + "sqlExpr" : "\"from_avro(s, 42, map())\"" }, "queryContext" : [ { "objectType" : "", @@ -58,7 +58,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format", - "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" + "sqlExpr" : "\"from_avro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" }, "queryContext" : [ { "objectType" : "", @@ -80,7 +80,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format", - "sqlExpr" : "\"toavro(s, 42)\"" + "sqlExpr" : "\"to_avro(s, 42)\"" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out index f9f491bd70fd1..a94175b1df39a 100644 --- a/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out @@ -38,7 +38,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format", - "sqlExpr" : "\"fromavro(s, 42, map())\"" + "sqlExpr" : "\"from_avro(s, 42, map())\"" }, "queryContext" : [ { "objectType" : "", @@ -62,7 +62,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format", - "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" + "sqlExpr" : "\"from_avro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" }, "queryContext" : [ { "objectType" : "", @@ -86,7 +86,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "messageParameters" : { "hint" : "", "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format", - "sqlExpr" : "\"toavro(s, 42)\"" + "sqlExpr" : "\"to_avro(s, 42)\"" }, "queryContext" : [ { "objectType" : "",