1
0
Fork 0

Merging upstream version 20.11.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:19:58 +01:00
parent 1bce3d0317
commit e71ccc03da
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
141 changed files with 66644 additions and 54334 deletions

View file

@ -12,9 +12,7 @@ from sqlglot.tokens import Token, Tokenizer, TokenType
from sqlglot.trie import TrieResult, in_trie, new_trie
if t.TYPE_CHECKING:
from typing_extensions import Literal
from sqlglot._typing import E
from sqlglot._typing import E, Lit
from sqlglot.dialects.dialect import Dialect, DialectType
logger = logging.getLogger("sqlglot")
@ -148,6 +146,11 @@ class Parser(metaclass=_Parser):
TokenType.ENUM16,
}
AGGREGATE_TYPE_TOKENS = {
TokenType.AGGREGATEFUNCTION,
TokenType.SIMPLEAGGREGATEFUNCTION,
}
TYPE_TOKENS = {
TokenType.BIT,
TokenType.BOOLEAN,
@ -241,6 +244,7 @@ class Parser(metaclass=_Parser):
TokenType.NULL,
*ENUM_TYPE_TOKENS,
*NESTED_TYPE_TOKENS,
*AGGREGATE_TYPE_TOKENS,
}
SIGNED_TO_UNSIGNED_TYPE_TOKEN = {
@ -653,9 +657,11 @@ class Parser(metaclass=_Parser):
PLACEHOLDER_PARSERS = {
TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
TokenType.PARAMETER: lambda self: self._parse_parameter(),
TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
else None,
TokenType.COLON: lambda self: (
self.expression(exp.Placeholder, this=self._prev.text)
if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
else None
),
}
RANGE_PARSERS = {
@ -705,6 +711,9 @@ class Parser(metaclass=_Parser):
"IMMUTABLE": lambda self: self.expression(
exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
),
"INHERITS": lambda self: self.expression(
exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table)
),
"INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()),
"JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
"LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
@ -822,6 +831,7 @@ class Parser(metaclass=_Parser):
ALTER_PARSERS = {
"ADD": lambda self: self._parse_alter_table_add(),
"ALTER": lambda self: self._parse_alter_table_alter(),
"CLUSTER BY": lambda self: self._parse_cluster(wrapped=True),
"DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
"DROP": lambda self: self._parse_alter_table_drop(),
"RENAME": lambda self: self._parse_alter_table_rename(),
@ -973,6 +983,9 @@ class Parser(metaclass=_Parser):
MODIFIERS_ATTACHED_TO_UNION = True
UNION_MODIFIERS = {"order", "limit", "offset"}
# parses no parenthesis if statements as commands
NO_PAREN_IF_COMMANDS = True
__slots__ = (
"error_level",
"error_message_context",
@ -1207,7 +1220,20 @@ class Parser(metaclass=_Parser):
if index != self._index:
self._advance(index - self._index)
def _warn_unsupported(self) -> None:
if len(self._tokens) <= 1:
return
# We use _find_sql because self.sql may comprise multiple chunks, and we're only
# interested in emitting a warning for the one being currently processed.
sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
logger.warning(
f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
)
def _parse_command(self) -> exp.Command:
self._warn_unsupported()
return self.expression(
exp.Command, this=self._prev.text.upper(), expression=self._parse_string()
)
@ -1329,8 +1355,10 @@ class Parser(metaclass=_Parser):
start = self._prev
comments = self._prev_comments
replace = start.text.upper() == "REPLACE" or self._match_pair(
TokenType.OR, TokenType.REPLACE
replace = (
start.token_type == TokenType.REPLACE
or self._match_pair(TokenType.OR, TokenType.REPLACE)
or self._match_pair(TokenType.OR, TokenType.ALTER)
)
unique = self._match(TokenType.UNIQUE)
@ -1440,6 +1468,9 @@ class Parser(metaclass=_Parser):
exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy
)
if self._curr:
return self._parse_as_command(start)
return self.expression(
exp.Create,
comments=comments,
@ -1516,11 +1547,13 @@ class Parser(metaclass=_Parser):
return self.expression(
exp.FileFormatProperty,
this=self.expression(
exp.InputOutputFormat, input_format=input_format, output_format=output_format
)
if input_format or output_format
else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
this=(
self.expression(
exp.InputOutputFormat, input_format=input_format, output_format=output_format
)
if input_format or output_format
else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
),
)
def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E:
@ -1632,8 +1665,15 @@ class Parser(metaclass=_Parser):
return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT))
def _parse_cluster(self) -> exp.Cluster:
return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered))
def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster:
return self.expression(
exp.Cluster,
expressions=(
self._parse_wrapped_csv(self._parse_ordered)
if wrapped
else self._parse_csv(self._parse_ordered)
),
)
def _parse_clustered_by(self) -> exp.ClusteredByProperty:
self._match_text_seq("BY")
@ -2681,6 +2721,8 @@ class Parser(metaclass=_Parser):
else:
columns = None
include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
return self.expression(
exp.Index,
this=index,
@ -2690,6 +2732,7 @@ class Parser(metaclass=_Parser):
unique=unique,
primary=primary,
amp=amp,
include=include,
partition_by=self._parse_partition_by(),
where=self._parse_where(),
)
@ -3380,8 +3423,8 @@ class Parser(metaclass=_Parser):
def _parse_comparison(self) -> t.Optional[exp.Expression]:
return self._parse_tokens(self._parse_range, self.COMPARISON)
def _parse_range(self) -> t.Optional[exp.Expression]:
this = self._parse_bitwise()
def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
this = this or self._parse_bitwise()
negate = self._match(TokenType.NOT)
if self._match_set(self.RANGE_PARSERS):
@ -3535,14 +3578,21 @@ class Parser(metaclass=_Parser):
return self._parse_tokens(self._parse_factor, self.TERM)
def _parse_factor(self) -> t.Optional[exp.Expression]:
if self.EXPONENT:
factor = self._parse_tokens(self._parse_exponent, self.FACTOR)
else:
factor = self._parse_tokens(self._parse_unary, self.FACTOR)
if isinstance(factor, exp.Div):
factor.args["typed"] = self.dialect.TYPED_DIVISION
factor.args["safe"] = self.dialect.SAFE_DIVISION
return factor
parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary
this = parse_method()
while self._match_set(self.FACTOR):
this = self.expression(
self.FACTOR[self._prev.token_type],
this=this,
comments=self._prev_comments,
expression=parse_method(),
)
if isinstance(this, exp.Div):
this.args["typed"] = self.dialect.TYPED_DIVISION
this.args["safe"] = self.dialect.SAFE_DIVISION
return this
def _parse_exponent(self) -> t.Optional[exp.Expression]:
return self._parse_tokens(self._parse_unary, self.EXPONENT)
@ -3617,6 +3667,7 @@ class Parser(metaclass=_Parser):
return exp.DataType.build(type_name, udt=True)
else:
self._retreat(self._index - 1)
return None
else:
return None
@ -3631,6 +3682,7 @@ class Parser(metaclass=_Parser):
nested = type_token in self.NESTED_TYPE_TOKENS
is_struct = type_token in self.STRUCT_TYPE_TOKENS
is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS
expressions = None
maybe_func = False
@ -3645,6 +3697,18 @@ class Parser(metaclass=_Parser):
)
elif type_token in self.ENUM_TYPE_TOKENS:
expressions = self._parse_csv(self._parse_equality)
elif is_aggregate:
func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var(
any_token=False, tokens=(TokenType.VAR,)
)
if not func_or_ident or not self._match(TokenType.COMMA):
return None
expressions = self._parse_csv(
lambda: self._parse_types(
check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
)
)
expressions.insert(0, func_or_ident)
else:
expressions = self._parse_csv(self._parse_type_size)
@ -4413,6 +4477,10 @@ class Parser(metaclass=_Parser):
self._match_r_paren()
else:
index = self._index - 1
if self.NO_PAREN_IF_COMMANDS and index == 0:
return self._parse_as_command(self._prev)
condition = self._parse_conjunction()
if not condition:
@ -4624,12 +4692,10 @@ class Parser(metaclass=_Parser):
return None
@t.overload
def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject:
...
def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ...
@t.overload
def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg:
...
def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ...
def _parse_json_object(self, agg=False):
star = self._parse_star()
@ -4974,11 +5040,12 @@ class Parser(metaclass=_Parser):
if alias:
this = self.expression(exp.Alias, comments=comments, this=this, alias=alias)
column = this.this
# Moves the comment next to the alias in `expr /* comment */ AS alias`
if not this.comments and this.this.comments:
this.comments = this.this.comments
this.this.comments = None
if not this.comments and column and column.comments:
this.comments = column.comments
column.comments = None
return this
@ -5244,7 +5311,7 @@ class Parser(metaclass=_Parser):
if self._match_text_seq("CHECK"):
expression = self._parse_wrapped(self._parse_conjunction)
enforced = self._match_text_seq("ENFORCED")
enforced = self._match_text_seq("ENFORCED") or False
return self.expression(
exp.AddConstraint, this=this, expression=expression, enforced=enforced
@ -5278,6 +5345,8 @@ class Parser(metaclass=_Parser):
return self.expression(exp.AlterColumn, this=column, drop=True)
if self._match_pair(TokenType.SET, TokenType.DEFAULT):
return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
if self._match(TokenType.COMMENT):
return self.expression(exp.AlterColumn, this=column, comment=self._parse_string())
self._match_text_seq("SET", "DATA")
return self.expression(
@ -5298,7 +5367,18 @@ class Parser(metaclass=_Parser):
self._retreat(index)
return self._parse_csv(self._parse_drop_column)
def _parse_alter_table_rename(self) -> exp.RenameTable:
def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]:
if self._match(TokenType.COLUMN):
exists = self._parse_exists()
old_column = self._parse_column()
to = self._match_text_seq("TO")
new_column = self._parse_column()
if old_column is None or to is None or new_column is None:
return None
return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists)
self._match_text_seq("TO")
return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
@ -5319,7 +5399,7 @@ class Parser(metaclass=_Parser):
if parser:
actions = ensure_list(parser(self))
if not self._curr:
if not self._curr and actions:
return self.expression(
exp.AlterTable,
this=this,
@ -5467,6 +5547,7 @@ class Parser(metaclass=_Parser):
self._advance()
text = self._find_sql(start, self._prev)
size = len(start.text)
self._warn_unsupported()
return exp.Command(this=text[:size], expression=text[size:])
def _parse_dict_property(self, this: str) -> exp.DictProperty:
@ -5634,7 +5715,7 @@ class Parser(metaclass=_Parser):
if advance:
self._advance()
return True
return False
return None
def _match_text_seq(self, *texts, advance=True):
index = self._index
@ -5643,7 +5724,7 @@ class Parser(metaclass=_Parser):
self._advance()
else:
self._retreat(index)
return False
return None
if not advance:
self._retreat(index)
@ -5651,14 +5732,12 @@ class Parser(metaclass=_Parser):
return True
@t.overload
def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
...
def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ...
@t.overload
def _replace_columns_with_dots(
self, this: t.Optional[exp.Expression]
) -> t.Optional[exp.Expression]:
...
) -> t.Optional[exp.Expression]: ...
def _replace_columns_with_dots(self, this):
if isinstance(this, exp.Dot):