From 81b92b163e6b3aa5794b0a8d82169c8b1295ce95 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 13:01:25 +0300 Subject: [PATCH 01/12] Feat(optimizer): optimize pivots --- sqlglot/dialects/dialect.py | 2 +- sqlglot/optimizer/eliminate_subqueries.py | 4 ++ sqlglot/optimizer/merge_subqueries.py | 1 + sqlglot/optimizer/optimizer.py | 2 + sqlglot/optimizer/pushdown_projections.py | 5 +- sqlglot/optimizer/qualify_columns.py | 44 ++++++++++++++++- sqlglot/optimizer/qualify_tables.py | 13 ++++++ sqlglot/optimizer/scope.py | 12 +++++ tests/fixtures/optimizer/optimizer.sql | 52 +++++++++++++++++++++ tests/fixtures/optimizer/qualify_tables.sql | 7 ++- tests/test_optimizer.py | 1 + 11 files changed, 136 insertions(+), 7 deletions(-) diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 71269f256b..20dc358cfa 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -275,7 +275,7 @@ def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str: def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str: self.unsupported("PIVOT unsupported") - return self.sql(expression) + return "" def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str: diff --git a/sqlglot/optimizer/eliminate_subqueries.py b/sqlglot/optimizer/eliminate_subqueries.py index 294214bf92..1a4bec59f2 100644 --- a/sqlglot/optimizer/eliminate_subqueries.py +++ b/sqlglot/optimizer/eliminate_subqueries.py @@ -133,6 +133,10 @@ def _eliminate_union(scope, existing_ctes, taken): def _eliminate_derived_table(scope, existing_ctes, taken): + # This ensures we don't drop the "pivot" arg from a pivoted subquery + if scope.parent.pivots: + return None + parent = scope.expression.parent name, cte = _new_cte(scope, existing_ctes, taken) diff --git a/sqlglot/optimizer/merge_subqueries.py b/sqlglot/optimizer/merge_subqueries.py index 9597b61715..f9c96643d9 100644 --- a/sqlglot/optimizer/merge_subqueries.py +++ b/sqlglot/optimizer/merge_subqueries.py @@ -167,6 +167,7 @@ def _outer_select_joins_on_inner_select_join(): and isinstance(inner_select, exp.Select) and not any(inner_select.args.get(arg) for arg in UNMERGABLE_ARGS) and inner_select.args.get("from") + and not outer_scope.pivots and not any(e.find(exp.AggFunc, exp.Select) for e in inner_select.expressions) and not (leave_tables_isolated and len(outer_scope.selected_sources) > 1) and not ( diff --git a/sqlglot/optimizer/optimizer.py b/sqlglot/optimizer/optimizer.py index 85ebb4ad8f..e2cd0c4c63 100644 --- a/sqlglot/optimizer/optimizer.py +++ b/sqlglot/optimizer/optimizer.py @@ -79,6 +79,7 @@ def optimize( schema = ensure_schema(schema or sqlglot.schema, dialect=dialect) possible_kwargs = {"db": db, "catalog": catalog, "schema": schema, **kwargs} expression = exp.maybe_parse(expression, dialect=dialect, copy=True) + for rule in rules: # Find any additional rule parameters, beyond `expression` rule_params = rule.__code__.co_varnames @@ -86,4 +87,5 @@ def optimize( param: possible_kwargs[param] for param in rule_params if param in possible_kwargs } expression = rule(expression, **rule_kwargs) + return expression diff --git a/sqlglot/optimizer/pushdown_projections.py b/sqlglot/optimizer/pushdown_projections.py index fde228316b..be3ddb25a8 100644 --- a/sqlglot/optimizer/pushdown_projections.py +++ b/sqlglot/optimizer/pushdown_projections.py @@ -39,8 +39,9 @@ def pushdown_projections(expression, schema=None, remove_unused_selections=True) for scope in reversed(traverse_scope(expression)): parent_selections = referenced_columns.get(scope, {SELECT_ALL}) - if scope.expression.args.get("distinct"): - # We can't remove columns SELECT DISTINCT nor UNION DISTINCT + if scope.expression.args.get("distinct") or scope.parent and scope.parent.pivots: + # We can't remove columns SELECT DISTINCT nor UNION DISTINCT. The same holds if + # we select from a pivoted source in the parent scope. parent_selections = {SELECT_ALL} if isinstance(scope.expression, exp.Union): diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 23bc05fd6d..c0a96a5bf8 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -5,6 +5,8 @@ from sqlglot import alias, exp from sqlglot.errors import OptimizeError +from sqlglot.helper import seq_get +from sqlglot.optimizer.expand_laterals import expand_laterals as _expand_laterals from sqlglot.optimizer.scope import Scope, traverse_scope, walk_in_scope from sqlglot.schema import Schema, ensure_schema @@ -65,7 +67,7 @@ def validate_qualify_columns(expression): for scope in traverse_scope(expression): if isinstance(scope.expression, exp.Select): unqualified_columns.extend(scope.unqualified_columns) - if scope.external_columns and not scope.is_correlated_subquery: + if scope.external_columns and not scope.is_correlated_subquery and not scope.pivots: column = scope.external_columns[0] raise OptimizeError( f"""Column '{column}' could not be resolved{f" for table: '{column.table}'" if column.table else ''}""" @@ -249,6 +251,12 @@ def _qualify_columns(scope, resolver): raise OptimizeError(f"Unknown column: {column_name}") if not column_table: + if scope.pivots and not column.find_ancestor(exp.Pivot): + # If the column is under the Pivot expression, we need to qualify it + # using the name of the pivoted source instead of the pivot's alias + column.set("table", scope.pivots[0].args["alias"]) + continue + column_table = resolver.get_table(column_name) # column_table can be a '' because bigquery unnest has no table alias @@ -272,6 +280,13 @@ def _qualify_columns(scope, resolver): if column_table: column.replace(exp.Dot.build([exp.column(root, table=column_table), *parts])) + for pivot in scope.pivots: + for column in pivot.find_all(exp.Column): + if not column.table and column.name in resolver.all_columns: + column_table = resolver.get_table(column.name) + if column_table: + column.set("table", column_table) + def _expand_stars(scope, resolver, using_column_tables): """Expand stars to lists of column selections""" @@ -281,6 +296,9 @@ def _expand_stars(scope, resolver, using_column_tables): replace_columns = {} coalesced_columns = set() + # TODO: handle optimization of multiple PIVOTs (and possibly UNPIVOTs) in the future + pivot = seq_get(scope.pivots, 0) + for expression in scope.selects: if isinstance(expression, exp.Star): tables = list(scope.selected_sources) @@ -297,9 +315,14 @@ def _expand_stars(scope, resolver, using_column_tables): for table in tables: if table not in scope.sources: raise OptimizeError(f"Unknown table: {table}") + columns = resolver.get_source_columns(table, only_visible=True) if columns and "*" not in columns: + if pivot and not pivot.args.get("unpivot"): + _add_pivot_columns(pivot, columns, new_selections) + continue + table_id = id(table) for name in columns: if name in using_column_tables and table in using_column_tables[name]: @@ -319,12 +342,13 @@ def _expand_stars(scope, resolver, using_column_tables): ) elif name not in except_columns.get(table_id, set()): alias_ = replace_columns.get(table_id, {}).get(name, name) - column = exp.column(name, table) + column = exp.column(name, table=table) new_selections.append( alias(column, alias_, copy=False) if alias_ != name else column ) else: return + scope.expression.set("expressions", new_selections) @@ -352,6 +376,22 @@ def _add_replace_columns(expression, tables, replace_columns): replace_columns[id(table)] = columns +def _add_pivot_columns(pivot, source_columns, columns): + pivot_output_columns = [col.output_name for col in pivot.args.get("columns", [])] + if not pivot_output_columns: + pivot_output_columns = [col.alias_or_name for col in pivot.expressions] + + pivot_columns = set(column.output_name for column in pivot.find_all(exp.Column)) + implicit_columns = list(set(source_columns) - pivot_columns) + + columns.extend( + [ + exp.alias_(exp.column(name, table=pivot.alias), name) + for name in implicit_columns + pivot_output_columns + ] + ) + + def _qualify_outputs(scope): """Ensure all output columns are aliased""" new_selections = [] diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 80a65a8e68..3bfc583c21 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -48,6 +48,15 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_))) scope.rename_source(None, alias_) + # TODO: handle optimization of multiple PIVOTs (and possibly UNPIVOTs) in the future + pivots = derived_table.args.get("pivots") + if pivots: + pivot = pivots[0] + if not pivot.alias: + pivot.set( + "alias", exp.TableAlias(this=exp.to_identifier(f"_q_{next(sequence)}")) + ) + for name, source in scope.sources.items(): if isinstance(source, exp.Table): if isinstance(source.this, exp.Identifier): @@ -66,6 +75,10 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): ) ) + pivots = source.args.get("pivots") + if pivots and not pivots[0].alias: + pivots[0].set("alias", exp.to_identifier(next_name())) + if schema and isinstance(source.this, exp.ReadCSV): with csv_reader(source.this) as reader: header = next(reader) diff --git a/sqlglot/optimizer/scope.py b/sqlglot/optimizer/scope.py index 88c66cdd06..707406b00c 100644 --- a/sqlglot/optimizer/scope.py +++ b/sqlglot/optimizer/scope.py @@ -83,6 +83,7 @@ def clear_cache(self): self._columns = None self._external_columns = None self._join_hints = None + self._pivots = None def branch(self, expression, scope_type, chain_sources=None, **kwargs): """Branch from the current scope to a new, inner scope""" @@ -372,6 +373,17 @@ def join_hints(self): return [] return self._join_hints + @property + def pivots(self): + if not self._pivots: + self._pivots = [ + pivot + for node in self.tables + self.derived_tables + for pivot in node.args.get("pivots", []) + ] + + return self._pivots + def source_columns(self, source_name): """ Get all columns in the current scope for a particular source. diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index abd28e8fd2..05503fbc0d 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -502,3 +502,55 @@ WHERE "unioned"."source_system" = 'bamboohr' OR "unioned"."source_system" = 'workday' QUALIFY ROW_NUMBER() OVER (PARTITION BY "unioned"."unique_filter_key" ORDER BY "unioned"."sort_order" DESC, 1) = 1; + +# title: pivoted source with explicit selections +# execute: false +SELECT * FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); +SELECT + "_q_1"."a" AS "a", + "_q_1"."x" AS "x", + "_q_1"."y" AS "y", + "_q_1"."z" AS "z" +FROM ( + SELECT + "tb"."a" AS "a", + "tb"."b" AS "b", + "tb"."c" AS "c" + FROM "sc"."tb" AS "tb" +) AS "_q_0" PIVOT( SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; + +# title: pivoted source with implicit selections +# execute: false +SELECT * FROM (SELECT * FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_1"."g" AS "g", + "_q_1"."x" AS "x", + "_q_1"."y" AS "y" +FROM ( + SELECT + "u"."f" AS "f", + "u"."g" AS "g", + "u"."h" AS "h" + FROM "u" AS "u" +) AS "_q_0" PIVOT( SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; + +# title: selecting explicit qualified columns from pivoted source with explicit selections +# execute: false +SELECT piv.x, piv.y FROM (SELECT f, h FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')) AS piv; +SELECT + "piv"."x" AS "x", + "piv"."y" AS "y" +FROM ( + SELECT + "u"."f" AS "f", + "u"."h" AS "h" + FROM "u" AS "u" +) AS "_q_0" PIVOT( SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; + +# title: selecting explicit unqualified columns from pivoted source with implicit selections +# execute: false +SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_0"."x" AS "x", + "_q_0"."y" AS "y" +FROM "u" AS "u" PIVOT( SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; diff --git a/tests/fixtures/optimizer/qualify_tables.sql b/tests/fixtures/optimizer/qualify_tables.sql index 0ad155a584..24d1b65884 100644 --- a/tests/fixtures/optimizer/qualify_tables.sql +++ b/tests/fixtures/optimizer/qualify_tables.sql @@ -16,9 +16,12 @@ WITH a AS (SELECT 1 FROM c.db.z AS z) SELECT 1 FROM a; SELECT (SELECT y.c FROM y AS y) FROM x; SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x; -------------------------- +SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')); +SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _q_0; + +---------------------------- -- Expand join constructs -------------------------- +---------------------------- -- This is valid in Trino, so we treat the (tbl AS tbl) as a "join construct" per postgres' terminology. SELECT * FROM (tbl AS tbl) AS _q_0; diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 8d27ae5c9c..e989672f16 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -145,6 +145,7 @@ def test_optimize(self): "x": {"a": "INT", "b": "INT"}, "y": {"b": "INT", "c": "INT"}, "z": {"a": "INT", "c": "INT"}, + "u": {"f": "INT", "g": "INT", "h": "TEXT"}, } self.check_file( From 82811a5b9caed8666b7445273b9dd3b2baba9442 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 13:12:04 +0300 Subject: [PATCH 02/12] Fixup --- sqlglot/optimizer/qualify_columns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index c0a96a5bf8..7fc9a64bc4 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -6,7 +6,6 @@ from sqlglot import alias, exp from sqlglot.errors import OptimizeError from sqlglot.helper import seq_get -from sqlglot.optimizer.expand_laterals import expand_laterals as _expand_laterals from sqlglot.optimizer.scope import Scope, traverse_scope, walk_in_scope from sqlglot.schema import Schema, ensure_schema From 6551eabe29de472540f0e35c996f3d0bb854a1f6 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 13:37:27 +0300 Subject: [PATCH 03/12] Simplify --- sqlglot/optimizer/qualify_tables.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index 3bfc583c21..e9a2725ff6 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -48,14 +48,11 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_))) scope.rename_source(None, alias_) - # TODO: handle optimization of multiple PIVOTs (and possibly UNPIVOTs) in the future pivots = derived_table.args.get("pivots") - if pivots: - pivot = pivots[0] - if not pivot.alias: - pivot.set( - "alias", exp.TableAlias(this=exp.to_identifier(f"_q_{next(sequence)}")) - ) + if pivots and not pivots[0].alias: + pivots[0].set( + "alias", exp.TableAlias(this=exp.to_identifier(f"_q_{next(sequence)}")) + ) for name, source in scope.sources.items(): if isinstance(source, exp.Table): From 5bd139831d2f7b1df68bd54a900ef7f075cecee8 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 14:13:41 +0300 Subject: [PATCH 04/12] Cleanup --- sqlglot/optimizer/qualify_columns.py | 2 +- sqlglot/optimizer/qualify_tables.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 7fc9a64bc4..a1aebf0242 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -253,7 +253,7 @@ def _qualify_columns(scope, resolver): if scope.pivots and not column.find_ancestor(exp.Pivot): # If the column is under the Pivot expression, we need to qualify it # using the name of the pivoted source instead of the pivot's alias - column.set("table", scope.pivots[0].args["alias"]) + column.set("table", exp.to_identifier(scope.pivots[0].alias)) continue column_table = resolver.get_table(column_name) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index e9a2725ff6..f553810b77 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -44,15 +44,13 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): derived_table.this.replace(exp.select("*").from_(unnested.copy(), copy=False)) if not derived_table.args.get("alias"): - alias_ = f"_q_{next(sequence)}" + alias_ = next_name() derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_))) scope.rename_source(None, alias_) pivots = derived_table.args.get("pivots") if pivots and not pivots[0].alias: - pivots[0].set( - "alias", exp.TableAlias(this=exp.to_identifier(f"_q_{next(sequence)}")) - ) + pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_name()))) for name, source in scope.sources.items(): if isinstance(source, exp.Table): @@ -74,7 +72,7 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): pivots = source.args.get("pivots") if pivots and not pivots[0].alias: - pivots[0].set("alias", exp.to_identifier(next_name())) + pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_name()))) if schema and isinstance(source.this, exp.ReadCSV): with csv_reader(source.this) as reader: From cea3a89ce4704b219e1d1e8b8d940970a924034c Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 14:35:53 +0300 Subject: [PATCH 05/12] Fix pivot sql generation --- sqlglot/expressions.py | 1 - sqlglot/generator.py | 19 ++++++++----------- tests/fixtures/optimizer/optimizer.sql | 8 ++++---- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index 953c0fd225..bdd1e9b5da 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -2953,7 +2953,6 @@ class Tag(Expression): class Pivot(Expression): arg_types = { - "this": False, "alias": False, "expressions": True, "field": True, diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 3001a54861..c9dfb44a60 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -1158,9 +1158,10 @@ def table_sql(self, expression: exp.Table, sep: str = " AS ") -> str: alias = self.sql(expression, "alias") alias = f"{sep}{alias}" if alias else "" - hints = self.expressions(expression, key="hints", sep=", ", flat=True) + hints = self.expressions(expression, key="hints", flat=True) hints = f" WITH ({hints})" if hints and self.TABLE_HINTS else "" - pivots = self.expressions(expression, key="pivots", sep="") + pivots = self.expressions(expression, key="pivots", sep=" ", flat=True) + pivots = f" {pivots}" if pivots else "" joins = self.expressions(expression, key="joins", sep="") laterals = self.expressions(expression, key="laterals", sep="") system_time = expression.args.get("system_time") @@ -1197,14 +1198,13 @@ def tablesample_sql( return f"{this} {kind} {method}({bucket}{percent}{rows}{size}){seed}{alias}" def pivot_sql(self, expression: exp.Pivot) -> str: - this = self.sql(expression, "this") alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" unpivot = expression.args.get("unpivot") direction = "UNPIVOT" if unpivot else "PIVOT" - expressions = self.expressions(expression, key="expressions") + expressions = self.expressions(expression, flat=True) field = self.sql(expression, "field") - return f"{this} {direction}({expressions} FOR {field}){alias}" + return f"{direction}({expressions} FOR {field}){alias}" def tuple_sql(self, expression: exp.Tuple) -> str: return f"({self.expressions(expression, flat=True)})" @@ -1562,13 +1562,10 @@ def subquery_sql(self, expression: exp.Subquery, sep: str = " AS ") -> str: alias = self.sql(expression, "alias") alias = f"{sep}{alias}" if alias else "" - sql = self.query_modifiers( - expression, - self.wrap(expression), - alias, - self.expressions(expression, key="pivots", sep=" "), - ) + pivots = self.expressions(expression, key="pivots", sep=" ", flat=True) + pivots = f" {pivots}" if pivots else "" + sql = self.query_modifiers(expression, self.wrap(expression), alias, pivots) return self.prepend_ctes(expression, sql) def qualify_sql(self, expression: exp.Qualify) -> str: diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 05503fbc0d..318a4cc984 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -517,7 +517,7 @@ FROM ( "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" -) AS "_q_0" PIVOT( SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; +) AS "_q_0" PIVOT(SUM("_q_0"."c") FOR "_q_0"."b" IN ('x', 'y', 'z')) AS "_q_1"; # title: pivoted source with implicit selections # execute: false @@ -532,7 +532,7 @@ FROM ( "u"."g" AS "g", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" PIVOT( SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; +) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "_q_1"; # title: selecting explicit qualified columns from pivoted source with explicit selections # execute: false @@ -545,7 +545,7 @@ FROM ( "u"."f" AS "f", "u"."h" AS "h" FROM "u" AS "u" -) AS "_q_0" PIVOT( SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; +) AS "_q_0" PIVOT(SUM("_q_0"."f") FOR "_q_0"."h" IN ('x', 'y')) AS "piv"; # title: selecting explicit unqualified columns from pivoted source with implicit selections # execute: false @@ -553,4 +553,4 @@ SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT "_q_0"."x" AS "x", "_q_0"."y" AS "y" -FROM "u" AS "u" PIVOT( SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; +FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; From cc231cc1dae17c88539694fc06fa26cb5c3c8b9f Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 22:27:10 +0300 Subject: [PATCH 06/12] Fixed snowflake pivot column names, add another optimizer test --- sqlglot/dialects/snowflake.py | 2 +- sqlglot/parser.py | 11 ++++++----- tests/fixtures/optimizer/optimizer.sql | 10 ++++++++++ tests/test_parser.py | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 27acb27a47..0ea704afcd 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -183,7 +183,7 @@ class Snowflake(Dialect): } class Parser(parser.Parser): - QUOTED_PIVOT_COLUMNS = True + IDENTIFY_PIVOT_STRINGS = True FUNCTIONS = { **parser.Parser.FUNCTIONS, diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 19352f8bf6..037374bd9b 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -777,8 +777,8 @@ class Parser(metaclass=_Parser): CONVERT_TYPE_FIRST = False - QUOTED_PIVOT_COLUMNS: t.Optional[bool] = None PREFIXED_PIVOT_COLUMNS = False + IDENTIFY_PIVOT_STRINGS = False LOG_BASE_FIRST = True LOG_DEFAULTS_TO_LN = False @@ -2465,14 +2465,15 @@ def _parse_pivot(self) -> t.Optional[exp.Expression]: names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) columns: t.List[exp.Expression] = [] - for col in pivot.args["field"].expressions: + for fld in pivot.args["field"].expressions: + field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name for name in names: if self.PREFIXED_PIVOT_COLUMNS: - name = f"{name}_{col.alias_or_name}" if name else col.alias_or_name + name = f"{name}_{field_name}" if name else field_name else: - name = f"{col.alias_or_name}_{name}" if name else col.alias_or_name + name = f"{field_name}_{name}" if name else field_name - columns.append(exp.to_identifier(name, quoted=self.QUOTED_PIVOT_COLUMNS)) + columns.append(exp.to_identifier(name)) pivot.set("columns", columns) diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 318a4cc984..ae229d4cc9 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -554,3 +554,13 @@ SELECT "_q_0"."x" AS "x", "_q_0"."y" AS "y" FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; + +# title: selecting all columns from a pivoted source and generating snowflake +# execute: false +# dialect: snowflake +SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + "_q_0"."g" AS "g", + "_q_0"."'x'" AS "'x'", + "_q_0"."'y'" AS "'y'" +FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; diff --git a/tests/test_parser.py b/tests/test_parser.py index b1d15e48fb..c7bc9b7537 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -426,7 +426,7 @@ def test_pivot_columns(self): nothing_aliased: { "bigquery": ["prop", "rudder"], "redshift": ["prop", "rudder"], - "snowflake": ['"prop"', '"rudder"'], + "snowflake": ['''"'prop'"''', '''"'rudder'"'''], "spark": ["prop", "rudder"], }, everything_aliased: { From 5232b857f936907772c8a513ebea37fb2a107e71 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Sun, 14 May 2023 23:55:50 +0300 Subject: [PATCH 07/12] Fixed issue with pivoted cte source, added bigquery test --- sqlglot/optimizer/qualify_tables.py | 2 +- sqlglot/optimizer/scope.py | 9 +++++++-- tests/fixtures/optimizer/optimizer.sql | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/sqlglot/optimizer/qualify_tables.py b/sqlglot/optimizer/qualify_tables.py index f553810b77..447b118481 100644 --- a/sqlglot/optimizer/qualify_tables.py +++ b/sqlglot/optimizer/qualify_tables.py @@ -64,7 +64,7 @@ def qualify_tables(expression, db=None, catalog=None, schema=None): source = source.replace( alias( source, - name if name else next_name(), + name or source.name or next_name(), copy=True, table=True, ) diff --git a/sqlglot/optimizer/scope.py b/sqlglot/optimizer/scope.py index 707406b00c..f3ce4f759f 100644 --- a/sqlglot/optimizer/scope.py +++ b/sqlglot/optimizer/scope.py @@ -615,8 +615,13 @@ def _traverse_tables(scope): source_name = expression.alias_or_name if table_name in scope.sources: - # This is a reference to a parent source (e.g. a CTE), not an actual table. - sources[source_name] = scope.sources[table_name] + # This is a reference to a parent source (e.g. a CTE), not an actual table, unless + # it is pivoted, because then we get back a new table and hence a new source. + pivots = expression.args.get("pivots") + if pivots: + sources[pivots[0].alias] = expression + else: + sources[source_name] = scope.sources[table_name] elif source_name in sources: sources[find_new_name(sources, table_name)] = expression else: diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index ae229d4cc9..ed75aae018 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -555,6 +555,23 @@ SELECT "_q_0"."y" AS "y" FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; +# title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery +# execute: false +# dialect: bigquery +WITH u_cte(f, g, h) AS (SELECT * FROM u) SELECT * FROM u_cte PIVOT(SUM(f) AS sum FOR h IN ('x', 'y')); +WITH `u_cte` AS ( + SELECT + `u`.`f` AS `f`, + `u`.`g` AS `g`, + `u`.`h` AS `h` + FROM `u` AS `u` +) +SELECT + `_q_0`.`g` AS `g`, + `_q_0`.`sum_x` AS `sum_x`, + `_q_0`.`sum_y` AS `sum_y` +FROM `u_cte` AS `u_cte` PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`; + # title: selecting all columns from a pivoted source and generating snowflake # execute: false # dialect: snowflake From f8baa737b6d9de316cf3da2a3ef2842e52802f88 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Mon, 15 May 2023 19:46:08 +0300 Subject: [PATCH 08/12] Factor out some computations --- sqlglot/optimizer/qualify_columns.py | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index a1aebf0242..a70ac57f4d 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -296,8 +296,18 @@ def _expand_stars(scope, resolver, using_column_tables): coalesced_columns = set() # TODO: handle optimization of multiple PIVOTs (and possibly UNPIVOTs) in the future + pivot_columns = None + pivot_output_columns = None pivot = seq_get(scope.pivots, 0) + has_pivoted_source = pivot and not pivot.args.get("unpivot") + if has_pivoted_source: + pivot_columns = set(column.output_name for column in pivot.find_all(exp.Column)) + + pivot_output_columns = [col.output_name for col in pivot.args.get("columns", [])] + if not pivot_output_columns: + pivot_output_columns = [col.alias_or_name for col in pivot.expressions] + for expression in scope.selects: if isinstance(expression, exp.Star): tables = list(scope.selected_sources) @@ -318,8 +328,14 @@ def _expand_stars(scope, resolver, using_column_tables): columns = resolver.get_source_columns(table, only_visible=True) if columns and "*" not in columns: - if pivot and not pivot.args.get("unpivot"): - _add_pivot_columns(pivot, columns, new_selections) + if has_pivoted_source: + implicit_columns = list(set(columns) - pivot_columns) + new_selections.extend( + [ + exp.alias_(exp.column(name, table=pivot.alias), name, copy=False) + for name in implicit_columns + pivot_output_columns + ] + ) continue table_id = id(table) @@ -375,22 +391,6 @@ def _add_replace_columns(expression, tables, replace_columns): replace_columns[id(table)] = columns -def _add_pivot_columns(pivot, source_columns, columns): - pivot_output_columns = [col.output_name for col in pivot.args.get("columns", [])] - if not pivot_output_columns: - pivot_output_columns = [col.alias_or_name for col in pivot.expressions] - - pivot_columns = set(column.output_name for column in pivot.find_all(exp.Column)) - implicit_columns = list(set(source_columns) - pivot_columns) - - columns.extend( - [ - exp.alias_(exp.column(name, table=pivot.alias), name) - for name in implicit_columns + pivot_output_columns - ] - ) - - def _qualify_outputs(scope): """Ensure all output columns are aliased""" new_selections = [] From dcdadb4d8a4e7690396fc3c978013a781fd01e12 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Mon, 15 May 2023 22:56:41 +0300 Subject: [PATCH 09/12] Cleanup --- sqlglot/optimizer/qualify_columns.py | 11 +++++------ sqlglot/optimizer/scope.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index a70ac57f4d..55dbe09db4 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -302,7 +302,8 @@ def _expand_stars(scope, resolver, using_column_tables): has_pivoted_source = pivot and not pivot.args.get("unpivot") if has_pivoted_source: - pivot_columns = set(column.output_name for column in pivot.find_all(exp.Column)) + # We're using a dictionary here in order to preserve order + pivot_columns = set(col.output_name for col in pivot.find_all(exp.Column)) pivot_output_columns = [col.output_name for col in pivot.args.get("columns", [])] if not pivot_output_columns: @@ -329,12 +330,10 @@ def _expand_stars(scope, resolver, using_column_tables): if columns and "*" not in columns: if has_pivoted_source: - implicit_columns = list(set(columns) - pivot_columns) + implicit_columns = [col for col in columns if col not in pivot_columns] new_selections.extend( - [ - exp.alias_(exp.column(name, table=pivot.alias), name, copy=False) - for name in implicit_columns + pivot_output_columns - ] + exp.alias_(exp.column(name, table=pivot.alias), name, copy=False) + for name in implicit_columns + pivot_output_columns ) continue diff --git a/sqlglot/optimizer/scope.py b/sqlglot/optimizer/scope.py index f3ce4f759f..e2e31787ae 100644 --- a/sqlglot/optimizer/scope.py +++ b/sqlglot/optimizer/scope.py @@ -379,7 +379,7 @@ def pivots(self): self._pivots = [ pivot for node in self.tables + self.derived_tables - for pivot in node.args.get("pivots", []) + for pivot in node.args.get("pivots") or [] ] return self._pivots From 54b0b0b3407ddc620f53202f9f825b8dc5495b93 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Tue, 16 May 2023 01:10:43 +0300 Subject: [PATCH 10/12] Add transform to unalias pivot in spark, more tests --- sqlglot/dialects/spark2.py | 49 +++++++++++++++++++++++++- sqlglot/expressions.py | 8 ++--- sqlglot/transforms.py | 23 +++++------- tests/dialects/test_spark.py | 12 +++++++ tests/fixtures/optimizer/optimizer.sql | 15 ++++++++ 5 files changed, 88 insertions(+), 19 deletions(-) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index 584671f947..c72fa6868c 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -53,6 +53,52 @@ def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str: raise ValueError("Improper scale for timestamp") +def _unalias_pivots(expression: exp.Expression) -> exp.Expression: + """ + Spark doesn't allow PIVOT aliases, so we need to remove them and possibly wrap a + pivoted source in a subquery with the same alias to preserve the query's semantics. + + Example: + >>> from sqlglot import parse_one + >>> expr = parse_one("SELECT piv.x FROM tbl PIVOT (SUM(a) FOR b IN ('x')) piv") + >>> print(_unalias_pivots(expr).sql(dialect="spark")) + SELECT piv.x FROM (SELECT * FROM tbl PIVOT(SUM(a) FOR b IN ('x'))) AS piv + """ + if isinstance(expression, exp.From) and expression.this.args.get("pivots"): + pivot = expression.this.args["pivots"][0] + if pivot.alias: + alias = pivot.args["alias"].pop() + return exp.From( + this=expression.this.replace( + exp.select("*").from_(expression.this.copy()).subquery(alias=alias) + ) + ) + + return expression + + +def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: + """ + Spark doesn't allow the column referenced in the PIVOT's field to be qualified, + so we need to unqualify it. + + Example: + >>> from sqlglot import parse_one + >>> expr = parse_one("SELECT * FROM tbl PIVOT (SUM(tbl.sales) FOR tbl.quarter IN ('Q1', 'Q2'))") + >>> print(_unqualify_pivot_columns(expr).sql(dialect="spark")) + SELECT * FROM tbl PIVOT(SUM(tbl.sales) FOR quarter IN ('Q1', 'Q1')) + """ + if isinstance(expression, exp.Pivot): + expression.args["field"].transform( + lambda node: exp.column(node.output_name, quoted=node.this.quoted) + if isinstance(node, exp.Column) + else node, + copy=False, + ) + + return expression + + class Spark2(Hive): class Parser(Hive.Parser): FUNCTIONS = { @@ -188,11 +234,12 @@ class Generator(Hive.Generator): exp.DayOfWeek: rename_func("DAYOFWEEK"), exp.DayOfYear: rename_func("DAYOFYEAR"), exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", + exp.From: transforms.preprocess([_unalias_pivots]), exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */", exp.LogicalAnd: rename_func("BOOL_AND"), exp.LogicalOr: rename_func("BOOL_OR"), exp.Map: _map_sql, - exp.Pivot: transforms.preprocess([transforms.unqualify_pivot_columns]), + exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), exp.Reduce: rename_func("AGGREGATE"), exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", diff --git a/sqlglot/expressions.py b/sqlglot/expressions.py index bdd1e9b5da..5e9326a125 100644 --- a/sqlglot/expressions.py +++ b/sqlglot/expressions.py @@ -2026,10 +2026,10 @@ def subquery(self, alias=None, copy=True) -> Subquery: Alias: the subquery """ instance = _maybe_copy(self, copy) - return Subquery( - this=instance, - alias=TableAlias(this=to_identifier(alias)) if alias else None, - ) + if not isinstance(alias, Expression): + alias = TableAlias(this=to_identifier(alias)) if alias else None + + return Subquery(this=instance, alias=alias) def limit(self, expression, dialect=None, copy=True, **opts) -> Select: raise NotImplementedError diff --git a/sqlglot/transforms.py b/sqlglot/transforms.py index 7917bc3e27..eae07f0495 100644 --- a/sqlglot/transforms.py +++ b/sqlglot/transforms.py @@ -63,16 +63,17 @@ def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression: distinct_cols = expression.args["distinct"].pop().args["on"].expressions outer_selects = expression.selects row_number = find_new_name(expression.named_selects, "_row_number") - window = exp.Window( - this=exp.RowNumber(), - partition_by=distinct_cols, - ) + window = exp.Window(this=exp.RowNumber(), partition_by=distinct_cols) order = expression.args.get("order") + if order: window.set("order", order.pop().copy()) + window = exp.alias_(window, row_number) expression.select(window, copy=False) + return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1') + return expression @@ -104,6 +105,7 @@ def eliminate_qualify(expression: exp.Expression) -> exp.Expression: alias = find_new_name(expression.named_selects, "_w") expression.select(exp.alias_(expr, alias), copy=False) column = exp.column(alias) + if isinstance(expr.parent, exp.Qualify): qualify_filters = column else: @@ -123,6 +125,7 @@ def remove_precision_parameterized_types(expression: exp.Expression) -> exp.Expr """ for node in expression.find_all(exp.DataType): node.set("expressions", [e for e in node.expressions if isinstance(e, exp.DataType)]) + return expression @@ -147,6 +150,7 @@ def unnest_to_explode(expression: exp.Expression) -> exp.Expression: alias=exp.TableAlias(this=alias.this, columns=[column]), # type: ignore ), ) + return expression @@ -226,6 +230,7 @@ def remove_target_from_merge(expression: exp.Expression) -> exp.Expression: else node, copy=False, ) + return expression @@ -242,16 +247,6 @@ def remove_within_group_for_percentiles(expression: exp.Expression) -> exp.Expre return expression -def unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: - if isinstance(expression, exp.Pivot): - expression.args["field"].transform( - lambda node: exp.column(node.output_name) if isinstance(node, exp.Column) else node, - copy=False, - ) - - return expression - - def preprocess( transforms: t.List[t.Callable[[exp.Expression], exp.Expression]], ) -> t.Callable[[Generator, exp.Expression], str]: diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index be03b4e4b8..cf81976913 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -214,6 +214,18 @@ def test_spark(self): self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") + self.validate_all( + "SELECT piv.Q1 FROM (SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + read={ + "snowflake": "SELECT piv.Q1 FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", + }, + ) + self.validate_all( + "SELECT piv.Q1 FROM (SELECT * FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + read={ + "snowflake": "SELECT piv.Q1 FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", + }, + ) self.validate_all( "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR quarter IN ('Q1', 'Q2'))", read={ diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index ed75aae018..d9597e5395 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -581,3 +581,18 @@ SELECT "_q_0"."'x'" AS "'x'", "_q_0"."'y'" AS "'y'" FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_q_0"; + +# title: selecting all columns from a pivoted source and generating spark +# note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) +# execute: false +# dialect: spark +SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); +SELECT + `_q_0`.`g` AS `g`, + `_q_0`.`x` AS `x`, + `_q_0`.`y` AS `y` +FROM ( + SELECT + * + FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y')) +) AS `_q_0`; From 930ed1783c6312676358b4e5cf3335964d941348 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Tue, 16 May 2023 01:12:49 +0300 Subject: [PATCH 11/12] Typo --- sqlglot/dialects/spark2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index c72fa6868c..7e2ad2b892 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -53,7 +53,7 @@ def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str: raise ValueError("Improper scale for timestamp") -def _unalias_pivots(expression: exp.Expression) -> exp.Expression: +def _unalias_pivot(expression: exp.Expression) -> exp.Expression: """ Spark doesn't allow PIVOT aliases, so we need to remove them and possibly wrap a pivoted source in a subquery with the same alias to preserve the query's semantics. @@ -61,7 +61,7 @@ def _unalias_pivots(expression: exp.Expression) -> exp.Expression: Example: >>> from sqlglot import parse_one >>> expr = parse_one("SELECT piv.x FROM tbl PIVOT (SUM(a) FOR b IN ('x')) piv") - >>> print(_unalias_pivots(expr).sql(dialect="spark")) + >>> print(_unalias_pivot(expr).sql(dialect="spark")) SELECT piv.x FROM (SELECT * FROM tbl PIVOT(SUM(a) FOR b IN ('x'))) AS piv """ if isinstance(expression, exp.From) and expression.this.args.get("pivots"): @@ -234,7 +234,7 @@ class Generator(Hive.Generator): exp.DayOfWeek: rename_func("DAYOFWEEK"), exp.DayOfYear: rename_func("DAYOFYEAR"), exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", - exp.From: transforms.preprocess([_unalias_pivots]), + exp.From: transforms.preprocess([_unalias_pivot]), exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */", exp.LogicalAnd: rename_func("BOOL_AND"), exp.LogicalOr: rename_func("BOOL_OR"), From cbe8c5dddc7e9aa1f3e25aab05775af523108f55 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Tue, 16 May 2023 01:33:59 +0300 Subject: [PATCH 12/12] Comment fixup --- sqlglot/optimizer/qualify_columns.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index 55dbe09db4..e15380acd6 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -302,7 +302,6 @@ def _expand_stars(scope, resolver, using_column_tables): has_pivoted_source = pivot and not pivot.args.get("unpivot") if has_pivoted_source: - # We're using a dictionary here in order to preserve order pivot_columns = set(col.output_name for col in pivot.find_all(exp.Column)) pivot_output_columns = [col.output_name for col in pivot.args.get("columns", [])]