1
0
Fork 0

Merging upstream version 16.4.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 20:04:59 +01:00
parent 8a4abed982
commit 71f21d9752
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
90 changed files with 35638 additions and 33343 deletions

View file

@ -4,6 +4,7 @@ import typing as t
from enum import Enum
from sqlglot import exp
from sqlglot._typing import E
from sqlglot.generator import Generator
from sqlglot.helper import flatten, seq_get
from sqlglot.parser import Parser
@ -11,14 +12,6 @@ from sqlglot.time import format_time
from sqlglot.tokens import Token, Tokenizer, TokenType
from sqlglot.trie import new_trie
if t.TYPE_CHECKING:
from sqlglot._typing import E
# Only Snowflake is currently known to resolve unquoted identifiers as uppercase.
# https://docs.snowflake.com/en/sql-reference/identifiers-syntax
RESOLVES_IDENTIFIERS_AS_UPPERCASE = {"snowflake"}
class Dialects(str, Enum):
DIALECT = ""
@ -117,6 +110,9 @@ class _Dialect(type):
"IDENTIFIER_ESCAPE": klass.tokenizer_class.IDENTIFIER_ESCAPES[0],
}
if enum not in ("", "bigquery"):
dialect_properties["SELECT_KINDS"] = ()
# Pass required dialect properties to the tokenizer, parser and generator classes
for subclass in (klass.tokenizer_class, klass.parser_class, klass.generator_class):
for name, value in dialect_properties.items():
@ -126,6 +122,8 @@ class _Dialect(type):
if not klass.STRICT_STRING_CONCAT:
klass.parser_class.BITWISE[TokenType.DPIPE] = exp.SafeDPipe
klass.generator_class.can_identify = klass.can_identify
return klass
@ -139,6 +137,10 @@ class Dialect(metaclass=_Dialect):
# Determines whether or not the table alias comes after tablesample
ALIAS_POST_TABLESAMPLE = False
# Determines whether or not unquoted identifiers are resolved as uppercase
# When set to None, it means that the dialect treats all identifiers as case-insensitive
RESOLVES_IDENTIFIERS_AS_UPPERCASE: t.Optional[bool] = False
# Determines whether or not an unquoted identifier can start with a digit
IDENTIFIERS_CAN_START_WITH_DIGIT = False
@ -213,6 +215,66 @@ class Dialect(metaclass=_Dialect):
return expression
@classmethod
def normalize_identifier(cls, expression: E) -> E:
"""
Normalizes an unquoted identifier to either lower or upper case, thus essentially
making it case-insensitive. If a dialect treats all identifiers as case-insensitive,
they will be normalized regardless of being quoted or not.
"""
if isinstance(expression, exp.Identifier) and (
not expression.quoted or cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None
):
expression.set(
"this",
expression.this.upper()
if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE
else expression.this.lower(),
)
return expression
@classmethod
def case_sensitive(cls, text: str) -> bool:
"""Checks if text contains any case sensitive characters, based on the dialect's rules."""
if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE is None:
return False
unsafe = str.islower if cls.RESOLVES_IDENTIFIERS_AS_UPPERCASE else str.isupper
return any(unsafe(char) for char in text)
@classmethod
def can_identify(cls, text: str, identify: str | bool = "safe") -> bool:
"""Checks if text can be identified given an identify option.
Args:
text: The text to check.
identify:
"always" or `True`: Always returns true.
"safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
"""
if identify is True or identify == "always":
return True
if identify == "safe":
return not cls.case_sensitive(text)
return False
@classmethod
def quote_identifier(cls, expression: E, identify: bool = True) -> E:
if isinstance(expression, exp.Identifier):
name = expression.this
expression.set(
"quoted",
identify or cls.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
)
return expression
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
return self.parser(**opts).parse(self.tokenize(sql), sql)