From 52538f0d9bd1258dc2a0a2ab5bdb953f85d85da9 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 9 Oct 2024 10:08:06 +0200 Subject: [PATCH] [SPARK-49909][SQL] Fix the pretty name of some expressions ### What changes were proposed in this pull request? The pr aims to fix the `pretty name` of some `expressions`, includes: `random`, `to_varchar`, `current_database`, `curdate`, `dateadd` and `array_agg`. ### Why are the changes needed? The actual function name used does not match the displayed name, as shown below: - Before: image - After: image ### Does this PR introduce _any_ user-facing change? Yes, Make the header of the data seen by the end-user from `Spark SQL` consistent with the `actual function name` used. ### How was this patch tested? - Pass GA. - Update existed UT. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48385 from panbingkun/SPARK-49909. Authored-by: panbingkun Signed-off-by: Max Gekk --- python/pyspark/sql/functions/builtin.py | 80 +++++++++---------- .../expressions/aggregate/collect.scala | 5 +- .../expressions/datetimeExpressions.scala | 5 +- .../spark/sql/catalyst/expressions/misc.scala | 3 +- .../expressions/numberFormatExpressions.scala | 7 +- .../expressions/randomExpressions.scala | 8 +- .../function_array_agg.explain | 2 +- .../explain-results/function_curdate.explain | 2 +- .../function_current_database.explain | 2 +- .../explain-results/function_dateadd.explain | 2 +- .../function_random_with_seed.explain | 2 +- .../function_to_varchar.explain | 2 +- .../sql-functions/sql-expression-schema.md | 12 +-- .../analyzer-results/charvarchar.sql.out | 6 +- .../current_database_catalog.sql.out | 2 +- .../analyzer-results/group-by.sql.out | 4 +- .../sql-session-variables.sql.out | 2 +- .../sql-tests/results/charvarchar.sql.out | 6 +- .../results/current_database_catalog.sql.out | 2 +- .../sql-tests/results/group-by.sql.out | 4 +- .../results/subexp-elimination.sql.out | 6 +- 21 files changed, 87 insertions(+), 77 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index beed832e36067..b75d1b2f59faf 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -4921,44 +4921,44 @@ def array_agg(col: "ColumnOrName") -> Column: >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([[1],[1],[2]], ["c"]) >>> df.agg(sf.sort_array(sf.array_agg('c'))).show() - +---------------------------------+ - |sort_array(collect_list(c), true)| - +---------------------------------+ - | [1, 1, 2]| - +---------------------------------+ + +------------------------------+ + |sort_array(array_agg(c), true)| + +------------------------------+ + | [1, 1, 2]| + +------------------------------+ Example 2: Using array_agg function on a string column >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([["apple"],["apple"],["banana"]], ["c"]) >>> df.agg(sf.sort_array(sf.array_agg('c'))).show(truncate=False) - +---------------------------------+ - |sort_array(collect_list(c), true)| - +---------------------------------+ - |[apple, apple, banana] | - +---------------------------------+ + +------------------------------+ + |sort_array(array_agg(c), true)| + +------------------------------+ + |[apple, apple, banana] | + +------------------------------+ Example 3: Using array_agg function on a column with null values >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([[1],[None],[2]], ["c"]) >>> df.agg(sf.sort_array(sf.array_agg('c'))).show() - +---------------------------------+ - |sort_array(collect_list(c), true)| - +---------------------------------+ - | [1, 2]| - +---------------------------------+ + +------------------------------+ + |sort_array(array_agg(c), true)| + +------------------------------+ + | [1, 2]| + +------------------------------+ Example 4: Using array_agg function on a column with different data types >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([[1],["apple"],[2]], ["c"]) >>> df.agg(sf.sort_array(sf.array_agg('c'))).show() - +---------------------------------+ - |sort_array(collect_list(c), true)| - +---------------------------------+ - | [1, 2, apple]| - +---------------------------------+ + +------------------------------+ + |sort_array(array_agg(c), true)| + +------------------------------+ + | [1, 2, apple]| + +------------------------------+ """ return _invoke_function_over_columns("array_agg", col) @@ -8712,31 +8712,31 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", 1)).show() - +---------------+ - |date_add(dt, 1)| - +---------------+ - | 2015-04-09| - +---------------+ + +--------------+ + |dateadd(dt, 1)| + +--------------+ + | 2015-04-09| + +--------------+ >>> import pyspark.sql.functions as sf >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", sf.lit(2))).show() - +---------------+ - |date_add(dt, 2)| - +---------------+ - | 2015-04-10| - +---------------+ + +--------------+ + |dateadd(dt, 2)| + +--------------+ + | 2015-04-10| + +--------------+ >>> import pyspark.sql.functions as sf >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", -1)).show() - +----------------+ - |date_add(dt, -1)| - +----------------+ - | 2015-04-07| - +----------------+ + +---------------+ + |dateadd(dt, -1)| + +---------------+ + | 2015-04-07| + +---------------+ """ days = _enum_to_value(days) days = lit(days) if isinstance(days, int) else days @@ -10343,11 +10343,11 @@ def current_database() -> Column: Examples -------- >>> spark.range(1).select(current_database()).show() - +----------------+ - |current_schema()| - +----------------+ - | default| - +----------------+ + +------------------+ + |current_database()| + +------------------+ + | default| + +------------------+ """ return _invoke_function("current_database") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index c593c8bfb8341..0a4882bfada17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import scala.collection.mutable.Growable import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.trees.UnaryLike @@ -118,7 +118,8 @@ case class CollectList( override def createAggregationBuffer(): mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty - override def prettyName: String = "collect_list" + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("collect_list") override def eval(buffer: mutable.ArrayBuffer[Any]): Any = { new GenericArrayData(buffer.toArray) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index b166d235557fc..764637b97a100 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -150,7 +150,8 @@ case class CurrentDate(timeZoneId: Option[String] = None) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - override def prettyName: String = "current_date" + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_date") } // scalastyle:off line.size.limit @@ -329,7 +330,7 @@ case class DateAdd(startDate: Expression, days: Expression) }) } - override def prettyName: String = "date_add" + override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("date_add") override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): DateAdd = copy(startDate = newLeft, days = newRight) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index cb846f606632b..0315c12b9bb8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -202,7 +202,8 @@ object AssertTrue { case class CurrentDatabase() extends LeafExpression with Unevaluable { override def dataType: DataType = SQLConf.get.defaultStringType override def nullable: Boolean = false - override def prettyName: String = "current_schema" + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_database") final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index 5bd2ab6035e10..eefd21b236b7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.util.Locale -import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult} +import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} @@ -307,7 +307,10 @@ case class ToCharacter(left: Expression, right: Expression) inputTypeCheck } } - override def prettyName: String = "to_char" + + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_char") + override def nullSafeEval(decimal: Any, format: Any): Any = { val input = decimal.asInstanceOf[Decimal] numberFormatter.format(input) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index ada0a73a67958..3cec83facd01d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedSeed} +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, UnresolvedSeed} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber, toSQLExpr, toSQLType} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral} @@ -128,8 +128,12 @@ case class Rand(child: Expression, hideSeed: Boolean = false) extends Nondetermi } override def flatArguments: Iterator[Any] = Iterator(child) + + override def prettyName: String = + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("rand") + override def sql: String = { - s"rand(${if (hideSeed) "" else child.sql})" + s"$prettyName(${if (hideSeed) "" else child.sql})" } override protected def withNewChildInternal(newChild: Expression): Rand = copy(child = newChild) diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain index 102f736c62ef6..6668692f6cf1d 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain @@ -1,2 +1,2 @@ -Aggregate [collect_list(a#0, 0, 0) AS collect_list(a)#0] +Aggregate [array_agg(a#0, 0, 0) AS array_agg(a)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain index 5305b346c4f2d..be039d62a5494 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain @@ -1,2 +1,2 @@ -Project [current_date(Some(America/Los_Angeles)) AS current_date()#0] +Project [curdate(Some(America/Los_Angeles)) AS curdate()#0] +- LocalRelation , [d#0, t#0, s#0, x#0L, wt#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain index 481c0a478c8df..93dfac524d9a1 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain @@ -1,2 +1,2 @@ -Project [current_schema() AS current_schema()#0] +Project [current_database() AS current_database()#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain index 66325085b9c14..319428541760d 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain @@ -1,2 +1,2 @@ -Project [date_add(d#0, 2) AS date_add(d, 2)#0] +Project [dateadd(d#0, 2) AS dateadd(d, 2)#0] +- LocalRelation , [d#0, t#0, s#0, x#0L, wt#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain index 81c81e95c2bdd..5854d2c7fa6be 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain @@ -1,2 +1,2 @@ -Project [random(1) AS rand(1)#0] +Project [random(1) AS random(1)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain index f0d9cacc61ac5..cc5149bfed863 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain @@ -1,2 +1,2 @@ -Project [to_char(cast(b#0 as decimal(30,15)), $99.99) AS to_char(b, $99.99)#0] +Project [to_varchar(cast(b#0 as decimal(30,15)), $99.99) AS to_varchar(b, $99.99)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 5ad1380e1fb82..79fd25aa3eb14 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -99,9 +99,9 @@ | org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct | | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | -| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct | +| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct | -| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | +| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct | @@ -110,7 +110,7 @@ | org.apache.spark.sql.catalyst.expressions.CurrentUser | session_user | SELECT session_user() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct | | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | -| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | @@ -264,7 +264,7 @@ | org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct | | org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | -| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | +| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | | org.apache.spark.sql.catalyst.expressions.RandStr | randstr | SELECT randstr(3, 0) AS result | struct | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | @@ -340,7 +340,7 @@ | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct | | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct | | org.apache.spark.sql.catalyst.expressions.ToCharacterBuilder | to_char | SELECT to_char(454, '999') | struct | -| org.apache.spark.sql.catalyst.expressions.ToCharacterBuilder | to_varchar | SELECT to_varchar(454, '999') | struct | +| org.apache.spark.sql.catalyst.expressions.ToCharacterBuilder | to_varchar | SELECT to_varchar(454, '999') | struct | | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct | | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct | | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct | @@ -402,7 +402,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out index 524797015a2f6..d4bcb8f2ed042 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out @@ -722,19 +722,19 @@ Project [chr(cast(167 as bigint)) AS chr(167)#x, chr(cast(247 as bigint)) AS chr -- !query SELECT to_varchar(78.12, '$99.99') -- !query analysis -Project [to_char(78.12, $99.99) AS to_char(78.12, $99.99)#x] +Project [to_varchar(78.12, $99.99) AS to_varchar(78.12, $99.99)#x] +- OneRowRelation -- !query SELECT to_varchar(111.11, '99.9') -- !query analysis -Project [to_char(111.11, 99.9) AS to_char(111.11, 99.9)#x] +Project [to_varchar(111.11, 99.9) AS to_varchar(111.11, 99.9)#x] +- OneRowRelation -- !query SELECT to_varchar(12454.8, '99,999.9S') -- !query analysis -Project [to_char(12454.8, 99,999.9S) AS to_char(12454.8, 99,999.9S)#x] +Project [to_varchar(12454.8, 99,999.9S) AS to_varchar(12454.8, 99,999.9S)#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out index 1a71594f84932..2759f5e67507b 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out @@ -2,5 +2,5 @@ -- !query select current_database(), current_schema(), current_catalog() -- !query analysis -Project [current_schema() AS current_schema()#x, current_schema() AS current_schema()#x, current_catalog() AS current_catalog()#x] +Project [current_database() AS current_database()#x, current_schema() AS current_schema()#x, current_catalog() AS current_catalog()#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out index 8849aa4452252..6996eb913a21e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out @@ -1133,7 +1133,7 @@ SELECT FROM VALUES (1), (2), (1) AS tab(col) -- !query analysis -Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, collect_list(col#x, 0, 0) AS collect_list(col)#x] +Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, array_agg(col#x, 0, 0) AS array_agg(col)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1147,7 +1147,7 @@ FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a -- !query analysis -Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, collect_list(b#x, 0, 0) AS collect_list(b)#x] +Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, array_agg(b#x, 0, 0) AS array_agg(b)#x] +- SubqueryAlias v +- LocalRelation [a#x, b#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out index 02e7c39ae83fd..8c10d78405751 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out @@ -776,7 +776,7 @@ Project [NULL AS Expected#x, variablereference(system.session.var1=CAST(NULL AS -- !query DECLARE OR REPLACE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE() -- !query analysis -CreateVariable defaultvalueexpression(cast(current_schema() as string), CURRENT_DATABASE()), true +CreateVariable defaultvalueexpression(cast(current_database() as string), CURRENT_DATABASE()), true +- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1 diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index 8aafa25c5caaf..2960c4ca4f4d4 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -1235,7 +1235,7 @@ struct -- !query SELECT to_varchar(78.12, '$99.99') -- !query schema -struct +struct -- !query output $78.12 @@ -1243,7 +1243,7 @@ $78.12 -- !query SELECT to_varchar(111.11, '99.9') -- !query schema -struct +struct -- !query output ##.# @@ -1251,6 +1251,6 @@ struct -- !query SELECT to_varchar(12454.8, '99,999.9S') -- !query schema -struct +struct -- !query output 12,454.8+ diff --git a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out index 67db0adee7f07..7fbe2dfff4db1 100644 --- a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out @@ -2,6 +2,6 @@ -- !query select current_database(), current_schema(), current_catalog() -- !query schema -struct +struct -- !query output default default spark_catalog diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index d8a9f4c2e11f5..5d220fc12b78e 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1066,7 +1066,7 @@ SELECT FROM VALUES (1), (2), (1) AS tab(col) -- !query schema -struct,collect_list(col):array> +struct,array_agg(col):array> -- !query output [1,2,1] [1,2,1] @@ -1080,7 +1080,7 @@ FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a -- !query schema -struct,collect_list(b):array> +struct,array_agg(b):array> -- !query output 1 [4,4] [4,4] 2 [3,4] [3,4] diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out index 0f7ff3f107567..28457c0579e95 100644 --- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out @@ -72,7 +72,7 @@ NULL -- !query SELECT from_json(a, 'struct').a + random() > 2, from_json(a, 'struct').b, from_json(b, 'array>')[0].a, from_json(b, 'array>')[0].b + + random() > 2 FROM testData -- !query schema -struct<((from_json(a).a + rand()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ rand())) > 2):boolean> +struct<((from_json(a).a + random()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ random())) > 2):boolean> -- !query output NULL NULL 1 true false 2 1 true @@ -84,7 +84,7 @@ true 6 6 true -- !query SELECT if(from_json(a, 'struct').a + random() > 5, from_json(b, 'array>')[0].a, from_json(b, 'array>')[0].a + 1) FROM testData -- !query schema -struct<(IF(((from_json(a).a + rand()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> +struct<(IF(((from_json(a).a + random()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> -- !query output 2 2 @@ -96,7 +96,7 @@ NULL -- !query SELECT case when from_json(a, 'struct').a > 5 then from_json(a, 'struct').b + random() > 5 when from_json(a, 'struct').a > 4 then from_json(a, 'struct').b + 1 + random() > 2 else from_json(a, 'struct').b + 2 + random() > 5 end FROM testData -- !query schema -struct 5) THEN ((from_json(a).b + rand()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + rand()) > 2) ELSE (((from_json(a).b + 2) + rand()) > 5) END:boolean> +struct 5) THEN ((from_json(a).b + random()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + random()) > 2) ELSE (((from_json(a).b + 2) + random()) > 5) END:boolean> -- !query output NULL false