1
0
Fork 0

Merging upstream version 7.1.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:46:58 +01:00
parent 964bd62de9
commit e6b3d2fe54
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
42 changed files with 1430 additions and 253 deletions

View file

@ -135,11 +135,13 @@ class Parser:
TokenType.BOTH,
TokenType.BUCKET,
TokenType.CACHE,
TokenType.CALL,
TokenType.COLLATE,
TokenType.COMMIT,
TokenType.CONSTRAINT,
TokenType.DEFAULT,
TokenType.DELETE,
TokenType.DESCRIBE,
TokenType.DETERMINISTIC,
TokenType.EXECUTE,
TokenType.ENGINE,
@ -160,6 +162,7 @@ class Parser:
TokenType.LAZY,
TokenType.LANGUAGE,
TokenType.LEADING,
TokenType.LOCAL,
TokenType.LOCATION,
TokenType.MATERIALIZED,
TokenType.NATURAL,
@ -176,6 +179,7 @@ class Parser:
TokenType.REFERENCES,
TokenType.RETURNS,
TokenType.ROWS,
TokenType.SCHEMA,
TokenType.SCHEMA_COMMENT,
TokenType.SEED,
TokenType.SEMI,
@ -294,6 +298,11 @@ class Parser:
COLUMN_OPERATORS = {
TokenType.DOT: None,
TokenType.DCOLON: lambda self, this, to: self.expression(
exp.Cast,
this=this,
to=to,
),
TokenType.ARROW: lambda self, this, path: self.expression(
exp.JSONExtract,
this=this,
@ -342,8 +351,10 @@ class Parser:
STATEMENT_PARSERS = {
TokenType.CREATE: lambda self: self._parse_create(),
TokenType.DESCRIBE: lambda self: self._parse_describe(),
TokenType.DROP: lambda self: self._parse_drop(),
TokenType.INSERT: lambda self: self._parse_insert(),
TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
TokenType.UPDATE: lambda self: self._parse_update(),
TokenType.DELETE: lambda self: self._parse_delete(),
TokenType.CACHE: lambda self: self._parse_cache(),
@ -449,7 +460,14 @@ class Parser:
MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE}
CREATABLES = {
TokenType.TABLE,
TokenType.VIEW,
TokenType.FUNCTION,
TokenType.INDEX,
TokenType.PROCEDURE,
TokenType.SCHEMA,
}
STRICT_CAST = True
@ -650,7 +668,7 @@ class Parser:
materialized = self._match(TokenType.MATERIALIZED)
kind = self._match_set(self.CREATABLES) and self._prev.text
if not kind:
self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
self.raise_error(f"Expected {self.CREATABLES}")
return
return self.expression(
@ -677,7 +695,7 @@ class Parser:
create_token = self._match_set(self.CREATABLES) and self._prev
if not create_token:
self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE")
self.raise_error(f"Expected {self.CREATABLES}")
return
exists = self._parse_exists(not_=True)
@ -692,7 +710,7 @@ class Parser:
expression = self._parse_select_or_expression()
elif create_token.token_type == TokenType.INDEX:
this = self._parse_index()
elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW, TokenType.SCHEMA):
this = self._parse_table(schema=True)
properties = self._parse_properties()
if self._match(TokenType.ALIAS):
@ -836,19 +854,74 @@ class Parser:
return self.expression(exp.Properties, expressions=properties)
return None
def _parse_describe(self):
self._match(TokenType.TABLE)
return self.expression(exp.Describe, this=self._parse_id_var())
def _parse_insert(self):
overwrite = self._match(TokenType.OVERWRITE)
self._match(TokenType.INTO)
self._match(TokenType.TABLE)
local = self._match(TokenType.LOCAL)
if self._match_text("DIRECTORY"):
this = self.expression(
exp.Directory,
this=self._parse_var_or_string(),
local=local,
row_format=self._parse_row_format(),
)
else:
self._match(TokenType.INTO)
self._match(TokenType.TABLE)
this = self._parse_table(schema=True)
return self.expression(
exp.Insert,
this=self._parse_table(schema=True),
this=this,
exists=self._parse_exists(),
partition=self._parse_partition(),
expression=self._parse_select(nested=True),
overwrite=overwrite,
)
def _parse_row_format(self):
if not self._match_pair(TokenType.ROW, TokenType.FORMAT):
return None
self._match_text("DELIMITED")
kwargs = {}
if self._match_text("FIELDS", "TERMINATED", "BY"):
kwargs["fields"] = self._parse_string()
if self._match_text("ESCAPED", "BY"):
kwargs["escaped"] = self._parse_string()
if self._match_text("COLLECTION", "ITEMS", "TERMINATED", "BY"):
kwargs["collection_items"] = self._parse_string()
if self._match_text("MAP", "KEYS", "TERMINATED", "BY"):
kwargs["map_keys"] = self._parse_string()
if self._match_text("LINES", "TERMINATED", "BY"):
kwargs["lines"] = self._parse_string()
if self._match_text("NULL", "DEFINED", "AS"):
kwargs["null"] = self._parse_string()
return self.expression(exp.RowFormat, **kwargs)
def _parse_load_data(self):
local = self._match(TokenType.LOCAL)
self._match_text("INPATH")
inpath = self._parse_string()
overwrite = self._match(TokenType.OVERWRITE)
self._match_pair(TokenType.INTO, TokenType.TABLE)
return self.expression(
exp.LoadData,
this=self._parse_table(schema=True),
local=local,
overwrite=overwrite,
inpath=inpath,
partition=self._parse_partition(),
input_format=self._match_text("INPUTFORMAT") and self._parse_string(),
serde=self._match_text("SERDE") and self._parse_string(),
)
def _parse_delete(self):
self._match(TokenType.FROM)
@ -1484,6 +1557,14 @@ class Parser:
if self._match_set(self.RANGE_PARSERS):
this = self.RANGE_PARSERS[self._prev.token_type](self, this)
elif self._match(TokenType.ISNULL):
this = self.expression(exp.Is, this=this, expression=exp.Null())
# Postgres supports ISNULL and NOTNULL for conditions.
# https://blog.andreiavram.ro/postgresql-null-composite-type/
if self._match(TokenType.NOTNULL):
this = self.expression(exp.Is, this=this, expression=exp.Null())
this = self.expression(exp.Not, this=this)
if negate:
this = self.expression(exp.Not, this=this)
@ -1582,12 +1663,6 @@ class Parser:
return self._parse_column()
return type_token
while self._match(TokenType.DCOLON):
type_token = self._parse_types()
if not type_token:
self.raise_error("Expected type")
this = self.expression(exp.Cast, this=this, to=type_token)
return this
def _parse_types(self):
@ -1601,6 +1676,11 @@ class Parser:
is_struct = type_token == TokenType.STRUCT
expressions = None
if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
return exp.DataType(
this=exp.DataType.Type.ARRAY, expressions=[exp.DataType.build(type_token.value)], nested=True
)
if self._match(TokenType.L_BRACKET):
self._retreat(index)
return None
@ -1611,7 +1691,7 @@ class Parser:
elif nested:
expressions = self._parse_csv(self._parse_types)
else:
expressions = self._parse_csv(self._parse_type)
expressions = self._parse_csv(self._parse_conjunction)
if not expressions:
self._retreat(index)
@ -1677,8 +1757,17 @@ class Parser:
this = self._parse_bracket(this)
while self._match_set(self.COLUMN_OPERATORS):
op = self.COLUMN_OPERATORS.get(self._prev.token_type)
field = self._parse_star() or self._parse_function() or self._parse_id_var()
op_token = self._prev.token_type
op = self.COLUMN_OPERATORS.get(op_token)
if op_token == TokenType.DCOLON:
field = self._parse_types()
if not field:
self.raise_error("Expected type")
elif op:
field = exp.Literal.string(self._advance() or self._prev.text)
else:
field = self._parse_star() or self._parse_function() or self._parse_id_var()
if isinstance(field, exp.Func):
# bigquery allows function calls like x.y.count(...)
@ -1687,7 +1776,7 @@ class Parser:
this = self._replace_columns_with_dots(this)
if op:
this = op(self, this, exp.Literal.string(field.name))
this = op(self, this, field)
elif isinstance(this, exp.Column) and not this.table:
this = self.expression(exp.Column, this=field, table=this.this)
else:
@ -1808,11 +1897,10 @@ class Parser:
if not self._match(TokenType.ARROW):
self._retreat(index)
distinct = self._match(TokenType.DISTINCT)
this = self._parse_conjunction()
if distinct:
this = self.expression(exp.Distinct, this=this)
if self._match(TokenType.DISTINCT):
this = self.expression(exp.Distinct, expressions=self._parse_csv(self._parse_conjunction))
else:
this = self._parse_conjunction()
if self._match(TokenType.IGNORE_NULLS):
this = self.expression(exp.IgnoreNulls, this=this)
@ -2112,6 +2200,8 @@ class Parser:
this = self.expression(exp.Filter, this=this, expression=self._parse_where())
self._match_r_paren()
# T-SQL allows the OVER (...) syntax after WITHIN GROUP.
# https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
if self._match(TokenType.WITHIN_GROUP):
self._match_l_paren()
this = self.expression(
@ -2120,7 +2210,6 @@ class Parser:
expression=self._parse_order(),
)
self._match_r_paren()
return this
# SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
# Some dialects choose to implement and some do not.
@ -2366,6 +2455,16 @@ class Parser:
if not self._match(TokenType.R_PAREN):
self.raise_error("Expecting )")
def _match_text(self, *texts):
index = self._index
for text in texts:
if self._curr and self._curr.text.upper() == text:
self._advance()
else:
self._retreat(index)
return False
return True
def _replace_columns_with_dots(self, this):
if isinstance(this, exp.Dot):
exp.replace_children(this, self._replace_columns_with_dots)