From f18045a154d5fe58ef6e5d51c4abf3c53c204989 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 16 Apr 2021 13:23:18 +0800 Subject: [PATCH] [SPARK-35026][SQL] Support use CUBE/ROLLUP/GROUPING SETS in GROUPING SETS --- .../spark/sql/catalyst/parser/SqlBase.g4 | 12 +++- .../sql/catalyst/parser/AstBuilder.scala | 53 +++++++++++------ .../sql-tests/inputs/group-analytics.sql | 5 ++ .../results/ansi/group-analytics.sql.out | 59 ++++++++++++++++++- .../sql-tests/results/group-analytics.sql.out | 59 ++++++++++++++++++- 5 files changed, 166 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c958f9c387767..7243bc5966e2e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -601,14 +601,24 @@ aggregationClause ; groupByClause - : groupingAnalytics + : nestedGroupingSets + | groupingAnalytics | expression ; +nestedGroupingSets + : GROUPING SETS '(' nestedGroupingSet (',' nestedGroupingSet)* ')' + ; + groupingAnalytics : (ROLLUP | CUBE | GROUPING SETS) '(' groupingSet (',' groupingSet)* ')' ; +nestedGroupingSet + : groupingAnalytics + | '(' (expression (',' expression)*)? ')' + ; + groupingSet : '(' (expression (',' expression)*)? ')' | expression diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f9317e865f275..e2f89c07077fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -993,27 +993,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx.groupingExpressionsWithGroupingAnalytics.asScala .map(groupByExpr => { val groupingAnalytics = groupByExpr.groupingAnalytics + val nestedGroupingSets = groupByExpr.nestedGroupingSets() if (groupingAnalytics != null) { - val groupingSets = groupingAnalytics.groupingSet.asScala - .map(_.expression.asScala.map(e => expression(e)).toSeq) - if (groupingAnalytics.CUBE != null) { - // CUBE(A, B, (A, B), ()) is not supported. - if (groupingSets.exists(_.isEmpty)) { - throw new ParseException("Empty set in CUBE grouping sets is not supported.", - groupingAnalytics) + resolveGroupingAnalytics(groupingAnalytics) + } else if (nestedGroupingSets != null) { + val groupingSets = nestedGroupingSets.nestedGroupingSet.asScala.map { expr => + val groupingAnalytics = expr.groupingAnalytics() + if (groupingAnalytics != null) { + resolveGroupingAnalytics(groupingAnalytics).selectedGroupByExprs + } else { + Seq(expr.expression().asScala.map(e => expression(e))) } - Cube(groupingSets.toSeq) - } else if (groupingAnalytics.ROLLUP != null) { - // ROLLUP(A, B, (A, B), ()) is not supported. - if (groupingSets.exists(_.isEmpty)) { - throw new ParseException("Empty set in ROLLUP grouping sets is not supported.", - groupingAnalytics) - } - Rollup(groupingSets.toSeq) - } else { - assert(groupingAnalytics.GROUPING != null && groupingAnalytics.SETS != null) - GroupingSets(groupingSets.toSeq) - } + }.flatten.toSeq + GroupingSets(groupingSets) } else { expression(groupByExpr.expression) } @@ -1022,6 +1014,29 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } } + def resolveGroupingAnalytics(groupingAnalytics: GroupingAnalyticsContext): BaseGroupingSets = { + val groupingSets = groupingAnalytics.groupingSet.asScala + .map(_.expression.asScala.map(e => expression(e)).toSeq) + if (groupingAnalytics.CUBE != null) { + // CUBE(A, B, (A, B), ()) is not supported. + if (groupingSets.exists(_.isEmpty)) { + throw new ParseException("Empty set in CUBE grouping sets is not supported.", + groupingAnalytics) + } + Cube(groupingSets.toSeq) + } else if (groupingAnalytics.ROLLUP != null) { + // ROLLUP(A, B, (A, B), ()) is not supported. + if (groupingSets.exists(_.isEmpty)) { + throw new ParseException("Empty set in ROLLUP grouping sets is not supported.", + groupingAnalytics) + } + Rollup(groupingSets.toSeq) + } else { + assert(groupingAnalytics.GROUPING != null && groupingAnalytics.SETS != null) + GroupingSets(groupingSets.toSeq) + } + } + /** * Add [[UnresolvedHint]]s to a logical plan. */ diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql index 6dfe31e2706e4..88a35d02a34c0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql @@ -80,3 +80,8 @@ SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()); SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()); SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()); +-- Support use CUBE/ROLLUP/GROUPING SETS in GROUPING SETS +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)); +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()); +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())); + diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out index 1db8febb81f9e..47d5bcea45054 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/group-analytics.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 44 +-- Number of queries: 47 -- !query @@ -1067,3 +1067,60 @@ struct 3 NULL 2 3 NULL 2 3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out index 6dc02ead9daab..b44fd092040bb 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 44 +-- Number of queries: 47 -- !query @@ -1087,3 +1087,60 @@ struct 3 NULL 2 3 NULL 2 3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2