Skip to content

Commit

Permalink
Revert: "Feat!(schema, snowflake): normalize snowflake identifiers to…
Browse files Browse the repository at this point in the history
… uppercase" (tobymao#1680)

* Revert "Feat!(schema, snowflake): normalize snowflake identifiers to uppercase (tobymao#1666)"

This reverts commit 028fa35.

* Keep schema associated changes, snowflake logic excluded
  • Loading branch information
georgesittas authored and adrianisk committed Jun 21, 2023
1 parent f54d230 commit d8edd2c
Show file tree
Hide file tree
Showing 13 changed files with 110 additions and 179 deletions.
5 changes: 0 additions & 5 deletions sqlglot/dialects/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@
E = t.TypeVar("E", bound=exp.Expression)


# Only Snowflake is currently known to resolve unquoted identifiers as uppercase.
# https://docs.snowflake.com/en/sql-reference/identifiers-syntax
RESOLVES_IDENTIFIERS_AS_UPPERCASE = {"snowflake"}


class Dialects(str, Enum):
DIALECT = ""

Expand Down
15 changes: 3 additions & 12 deletions sqlglot/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

if t.TYPE_CHECKING:
from sqlglot import exp
from sqlglot.dialects.dialect import DialectType
from sqlglot.expressions import Expression

T = t.TypeVar("T")
Expand Down Expand Up @@ -422,26 +421,18 @@ def first(it: t.Iterable[T]) -> T:
return next(i for i in it)


def should_identify(text: str, identify: str | bool, dialect: DialectType = None) -> bool:
def should_identify(text: str, identify: str | bool) -> bool:
"""Checks if text should be identified given an identify option.
Args:
text: the text to check.
identify:
"always" or `True`: always returns true.
"safe": true if there is no uppercase or lowercase character in `text`, depending on `dialect`.
dialect: the dialect to use in order to decide whether a text should be identified.
identify: "always" | True - always returns true, "safe" - true if no upper case
Returns:
Whether or not a string should be identified.
"""
if identify is True or identify == "always":
return True

if identify == "safe":
from sqlglot.dialects.dialect import RESOLVES_IDENTIFIERS_AS_UPPERCASE

unsafe = str.islower if dialect in RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper
return not any(unsafe(char) for char in text)

return not any(char.isupper() for char in text)
return False
4 changes: 2 additions & 2 deletions sqlglot/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from sqlglot import Schema, exp, maybe_parse
from sqlglot.optimizer import Scope, build_scope, optimize
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
from sqlglot.optimizer.lower_identities import lower_identities
from sqlglot.optimizer.qualify_columns import qualify_columns
from sqlglot.optimizer.qualify_tables import qualify_tables

Expand Down Expand Up @@ -40,7 +40,7 @@ def lineage(
sql: str | exp.Expression,
schema: t.Optional[t.Dict | Schema] = None,
sources: t.Optional[t.Dict[str, str | exp.Subqueryable]] = None,
rules: t.Sequence[t.Callable] = (normalize_identifiers, qualify_tables, qualify_columns),
rules: t.Sequence[t.Callable] = (lower_identities, qualify_tables, qualify_columns),
dialect: DialectType = None,
) -> Node:
"""Build the lineage graph for a column of a SQL query.
Expand Down
12 changes: 3 additions & 9 deletions sqlglot/optimizer/canonicalize.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
from __future__ import annotations

import itertools
import typing as t

from sqlglot import exp
from sqlglot.helper import should_identify

if t.TYPE_CHECKING:
from sqlglot.dialects.dialect import DialectType


def canonicalize(
expression: exp.Expression, identify: str = "safe", dialect: DialectType = None
) -> exp.Expression:
def canonicalize(expression: exp.Expression, identify: str = "safe") -> exp.Expression:
"""Converts a sql expression into a standard form.
This method relies on annotate_types because many of the
Expand All @@ -22,15 +16,15 @@ def canonicalize(
expression: The expression to canonicalize.
identify: Whether or not to force identify identifier.
"""
exp.replace_children(expression, canonicalize, identify=identify, dialect=dialect)
exp.replace_children(expression, canonicalize, identify=identify)

expression = add_text_to_concat(expression)
expression = coerce_type(expression)
expression = remove_redundant_casts(expression)
expression = ensure_bool_predicates(expression)

if isinstance(expression, exp.Identifier):
if should_identify(expression.this, identify, dialect=dialect):
if should_identify(expression.this, identify):
expression.set("quoted", True)

return expression
Expand Down
88 changes: 88 additions & 0 deletions sqlglot/optimizer/lower_identities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from sqlglot import exp


def lower_identities(expression):
"""
Convert all unquoted identifiers to lower case.
Assuming the schema is all lower case, this essentially makes identifiers case-insensitive.
Example:
>>> import sqlglot
>>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar')
>>> lower_identities(expression).sql()
'SELECT bar.a AS A FROM "Foo".bar'
Args:
expression (sqlglot.Expression): expression to quote
Returns:
sqlglot.Expression: quoted expression
"""
# We need to leave the output aliases unchanged, so the selects need special handling
_lower_selects(expression)

# These clauses can reference output aliases and also need special handling
_lower_order(expression)
_lower_having(expression)

# We've already handled these args, so don't traverse into them
traversed = {"expressions", "order", "having"}

if isinstance(expression, exp.Subquery):
# Root subquery, e.g. (SELECT A AS A FROM X) LIMIT 1
lower_identities(expression.this)
traversed |= {"this"}

if isinstance(expression, exp.Union):
# Union, e.g. SELECT A AS A FROM X UNION SELECT A AS A FROM X
lower_identities(expression.left)
lower_identities(expression.right)
traversed |= {"this", "expression"}

for k, v in expression.iter_expressions():
if k in traversed:
continue
v.transform(_lower, copy=False)

return expression


def _lower_selects(expression):
for e in expression.expressions:
# Leave output aliases as-is
e.unalias().transform(_lower, copy=False)


def _lower_order(expression):
order = expression.args.get("order")

if not order:
return

output_aliases = {e.alias for e in expression.expressions if isinstance(e, exp.Alias)}

for ordered in order.expressions:
# Don't lower references to output aliases
if not (
isinstance(ordered.this, exp.Column)
and not ordered.this.table
and ordered.this.name in output_aliases
):
ordered.transform(_lower, copy=False)


def _lower_having(expression):
having = expression.args.get("having")

if not having:
return

# Don't lower references to output aliases
for agg in having.find_all(exp.AggFunc):
agg.transform(_lower, copy=False)


def _lower(node):
if isinstance(node, exp.Identifier) and not node.quoted:
node.set("this", node.this.lower())
return node
104 changes: 0 additions & 104 deletions sqlglot/optimizer/normalize_identifiers.py

This file was deleted.

6 changes: 3 additions & 3 deletions sqlglot/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from sqlglot.optimizer.eliminate_joins import eliminate_joins
from sqlglot.optimizer.eliminate_subqueries import eliminate_subqueries
from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
from sqlglot.optimizer.lower_identities import lower_identities
from sqlglot.optimizer.merge_subqueries import merge_subqueries
from sqlglot.optimizer.normalize import normalize
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
from sqlglot.optimizer.optimize_joins import optimize_joins
from sqlglot.optimizer.pushdown_predicates import pushdown_predicates
from sqlglot.optimizer.pushdown_projections import pushdown_projections
Expand All @@ -24,7 +24,7 @@
from sqlglot.schema import ensure_schema

RULES = (
normalize_identifiers,
lower_identities,
qualify_tables,
isolate_table_selects,
qualify_columns,
Expand Down Expand Up @@ -77,7 +77,7 @@ def optimize(
sqlglot.Expression: optimized expression
"""
schema = ensure_schema(schema or sqlglot.schema, dialect=dialect)
possible_kwargs = {"db": db, "catalog": catalog, "schema": schema, "dialect": dialect, **kwargs}
possible_kwargs = {"db": db, "catalog": catalog, "schema": schema, **kwargs}
expression = exp.maybe_parse(expression, dialect=dialect, copy=True)

for rule in rules:
Expand Down
8 changes: 1 addition & 7 deletions sqlglot/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import sqlglot
from sqlglot import expressions as exp
from sqlglot.dialects.dialect import RESOLVES_IDENTIFIERS_AS_UPPERCASE
from sqlglot.errors import ParseError, SchemaError
from sqlglot.helper import dict_depth
from sqlglot.trie import in_trie, new_trie
Expand Down Expand Up @@ -330,12 +329,7 @@ def _normalize_name(self, name: str | exp.Identifier, dialect: DialectType = Non
except ParseError:
return name if isinstance(name, str) else name.name

name = identifier.name

if identifier.quoted:
return name

return name.upper() if dialect in RESOLVES_IDENTIFIERS_AS_UPPERCASE else name.lower()
return identifier.name if identifier.quoted else identifier.name.lower()

def _depth(self) -> int:
# The columns themselves are a mapping, but we don't want to include those
Expand Down
Original file line number Diff line number Diff line change
@@ -1,37 +1,21 @@
SELECT a FROM x;
SELECT a FROM x;

# dialect: snowflake
SELECT A FROM X;
SELECT A FROM X;

SELECT "A" FROM "X";
SELECT "A" FROM "X";

SELECT a AS A FROM x;
SELECT a AS A FROM x;

# dialect: snowflake
SELECT A AS a FROM X;
SELECT A AS a FROM X;

SELECT * FROM x;
SELECT * FROM x;

SELECT A FROM x;
SELECT a FROM x;

# dialect: snowflake
SELECT a FROM X;
SELECT A FROM X;

SELECT a FROM X;
SELECT a FROM x;

# dialect: snowflake
SELECT A FROM x;
SELECT A FROM X;

SELECT A AS A FROM (SELECT a AS A FROM x);
SELECT a AS A FROM (SELECT a AS a FROM x);

Expand Down
Loading

0 comments on commit d8edd2c

Please sign in to comment.