1
0
Fork 0

Merging upstream version 25.8.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:52:10 +01:00
parent 1d73cb497c
commit 50df8dea29
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
61 changed files with 50550 additions and 50354 deletions

View file

@ -322,6 +322,7 @@ class BigQuery(Dialect):
"ANY TYPE": TokenType.VARIANT,
"BEGIN": TokenType.COMMAND,
"BEGIN TRANSACTION": TokenType.BEGIN,
"BYTEINT": TokenType.INT,
"BYTES": TokenType.BINARY,
"CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
"DATETIME": TokenType.TIMESTAMP,

View file

@ -81,6 +81,14 @@ def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc:
return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If"))
def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous:
if len(args) == 3:
return exp.Anonymous(this="STR_TO_DATE", expressions=args)
strtodate = exp.StrToDate.from_arg_list(args)
return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME))
def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]:
def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str:
if not expression.unit:
@ -181,6 +189,7 @@ class ClickHouse(Dialect):
"MAP": parser.build_var_map,
"MATCH": exp.RegexpLike.from_arg_list,
"RANDCANONICAL": exp.Rand.from_arg_list,
"STR_TO_DATE": _build_str_to_date,
"TUPLE": exp.Struct.from_arg_list,
"TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None),
"TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
@ -836,6 +845,24 @@ class ClickHouse(Dialect):
"NAMED COLLECTION",
}
def strtodate_sql(self, expression: exp.StrToDate) -> str:
strtodate_sql = self.function_fallback_sql(expression)
if not isinstance(expression.parent, exp.Cast):
# StrToDate returns DATEs in other dialects (eg. postgres), so
# this branch aims to improve the transpilation to clickhouse
return f"CAST({strtodate_sql} AS DATE)"
return strtodate_sql
def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
this = expression.this
if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"):
return self.sql(this)
return super().cast_sql(expression, safe_prefix=safe_prefix)
def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str:
this = self.json_path_part(expression.this)
return str(int(this) + 1) if is_int(this) else this

View file

@ -158,7 +158,7 @@ def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str:
if expression.is_type("array"):
return f"{self.expressions(expression, flat=True)}[]"
return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
# Type TIMESTAMP / TIME WITH TIME ZONE does not support any modifiers
if expression.is_type("timestamptz", "timetz"):
@ -186,9 +186,14 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str
return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In)
def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
arrow_sql = arrow_json_extract_sql(self, expression)
if not expression.same_parent and isinstance(expression.parent, (exp.Binary, exp.Bracket)):
if not expression.same_parent and isinstance(
expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
):
arrow_sql = self.wrap(arrow_sql)
return arrow_sql
@ -238,14 +243,15 @@ class DuckDB(Dialect):
"POSITIONAL": TokenType.POSITIONAL,
"SIGNED": TokenType.INT,
"STRING": TokenType.TEXT,
"UBIGINT": TokenType.UBIGINT,
"UINTEGER": TokenType.UINT,
"USMALLINT": TokenType.USMALLINT,
"UTINYINT": TokenType.UTINYINT,
"SUMMARIZE": TokenType.SUMMARIZE,
"TIMESTAMP_S": TokenType.TIMESTAMP_S,
"TIMESTAMP_MS": TokenType.TIMESTAMP_MS,
"TIMESTAMP_NS": TokenType.TIMESTAMP_NS,
"TIMESTAMP_US": TokenType.TIMESTAMP,
"UBIGINT": TokenType.UBIGINT,
"UINTEGER": TokenType.UINT,
"USMALLINT": TokenType.USMALLINT,
"UTINYINT": TokenType.UTINYINT,
"VARCHAR": TokenType.TEXT,
}
KEYWORDS.pop("/*+")
@ -744,10 +750,9 @@ class DuckDB(Dialect):
def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
# GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b)
if expression.args.get("is_end_exclusive"):
expression.set("is_end_exclusive", None)
return rename_func("RANGE")(self, expression)
return super().generateseries_sql(expression)
return self.function_fallback_sql(expression)
def bracket_sql(self, expression: exp.Bracket) -> str:
this = expression.this

View file

@ -168,16 +168,13 @@ def _serial_to_generated(expression: exp.Expression) -> exp.Expression:
def _build_generate_series(args: t.List) -> exp.GenerateSeries:
# The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day
# Note: postgres allows calls with just two arguments -- the "step" argument defaults to 1
step = seq_get(args, 2)
if step is None:
# Postgres allows calls with just two arguments -- the "step" argument defaults to 1
return exp.GenerateSeries.from_arg_list(args)
if step.is_string:
args[2] = exp.to_interval(step.this)
elif isinstance(step, exp.Interval) and not step.args.get("unit"):
args[2] = exp.to_interval(step.this.this)
if step is not None:
if step.is_string:
args[2] = exp.to_interval(step.this)
elif isinstance(step, exp.Interval) and not step.args.get("unit"):
args[2] = exp.to_interval(step.this.this)
return exp.GenerateSeries.from_arg_list(args)

View file

@ -393,9 +393,6 @@ class Presto(Dialect):
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
exp.AnyValue: rename_func("ARBITRARY"),
exp.ApproxDistinct: lambda self, e: self.func(
"APPROX_DISTINCT", e.this, e.args.get("accuracy")
),
exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
exp.ArgMax: rename_func("MAX_BY"),
exp.ArgMin: rename_func("MIN_BY"),

View file

@ -223,7 +223,7 @@ class SQLite(Dialect):
exp.select(exp.alias_("value", column_alias)).from_(expression).subquery()
)
else:
sql = super().generateseries_sql(expression)
sql = self.function_fallback_sql(expression)
return sql

View file

@ -322,6 +322,15 @@ def _build_with_arg_as_text(
return _parse
def _build_json_query(args: t.List, dialect: Dialect) -> exp.JSONExtract:
if len(args) == 1:
# The default value for path is '$'. As a result, if you don't provide a
# value for path, JSON_QUERY returns the input expression.
args.append(exp.Literal.string("$"))
return parser.build_extract_json_with_path(exp.JSONExtract)(args, dialect)
def _json_extract_sql(
self: TSQL.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar
) -> str:
@ -510,7 +519,7 @@ class TSQL(Dialect):
"GETDATE": exp.CurrentTimestamp.from_arg_list,
"HASHBYTES": _build_hashbytes,
"ISNULL": exp.Coalesce.from_arg_list,
"JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract),
"JSON_QUERY": _build_json_query,
"JSON_VALUE": parser.build_extract_json_with_path(exp.JSONExtractScalar),
"LEN": _build_with_arg_as_text(exp.Length),
"LEFT": _build_with_arg_as_text(exp.Left),
@ -790,6 +799,7 @@ class TSQL(Dialect):
PARSE_JSON_NAME = None
EXPRESSIONS_WITHOUT_NESTED_CTES = {
exp.Create,
exp.Delete,
exp.Insert,
exp.Intersect,
@ -989,31 +999,32 @@ class TSQL(Dialect):
kind = expression.kind
exists = expression.args.pop("exists", None)
if kind == "VIEW":
expression.this.set("catalog", None)
sql = super().create_sql(expression)
like_property = expression.find(exp.LikeProperty)
if like_property:
ctas_expression = like_property.this
else:
ctas_expression = expression.expression
if kind == "VIEW":
expression.this.set("catalog", None)
with_ = expression.args.get("with")
if ctas_expression and with_:
# We've already preprocessed the Create expression to bubble up any nested CTEs,
# but CREATE VIEW actually requires the WITH clause to come after it so we need
# to amend the AST by moving the CTEs to the CREATE VIEW statement's query.
ctas_expression.set("with", with_.pop())
sql = super().create_sql(expression)
table = expression.find(exp.Table)
# Convert CTAS statement to SELECT .. INTO ..
if kind == "TABLE" and ctas_expression:
ctas_with = ctas_expression.args.get("with")
if ctas_with:
ctas_with = ctas_with.pop()
if isinstance(ctas_expression, exp.UNWRAPPED_QUERIES):
ctas_expression = ctas_expression.subquery()
select_into = exp.select("*").from_(exp.alias_(ctas_expression, "temp", table=True))
select_into.set("into", exp.Into(this=table))
select_into.set("with", ctas_with)
if like_property:
select_into.limit(0, copy=False)

View file

@ -1439,6 +1439,11 @@ class Describe(Expression):
arg_types = {"this": True, "style": False, "kind": False, "expressions": False}
# https://duckdb.org/docs/guides/meta/summarize.html
class Summarize(Expression):
arg_types = {"this": True, "table": False}
class Kill(Expression):
arg_types = {"this": True, "kind": False}

View file

@ -105,12 +105,6 @@ class Generator(metaclass=_Generator):
exp.InlineLengthColumnConstraint: lambda self, e: f"INLINE LENGTH {self.sql(e, 'this')}",
exp.InputModelProperty: lambda self, e: f"INPUT{self.sql(e, 'this')}",
exp.IntervalSpan: lambda self, e: f"{self.sql(e, 'this')} TO {self.sql(e, 'expression')}",
exp.JSONExtract: lambda self, e: self.func(
"JSON_EXTRACT", e.this, e.expression, *e.expressions
),
exp.JSONExtractScalar: lambda self, e: self.func(
"JSON_EXTRACT_SCALAR", e.this, e.expression, *e.expressions
),
exp.LanguageProperty: lambda self, e: self.naked_property(e),
exp.LocationProperty: lambda self, e: self.naked_property(e),
exp.LogProperty: lambda _, e: f"{'NO ' if e.args.get('no') else ''}LOG",
@ -146,7 +140,6 @@ class Generator(metaclass=_Generator):
exp.TemporaryProperty: lambda *_: "TEMPORARY",
exp.TagColumnConstraint: lambda self, e: f"TAG ({self.expressions(e, flat=True)})",
exp.TitleColumnConstraint: lambda self, e: f"TITLE {self.sql(e, 'this')}",
exp.Timestamp: lambda self, e: self.func("TIMESTAMP", e.this, e.args.get("zone")),
exp.ToMap: lambda self, e: f"MAP {self.sql(e, 'this')}",
exp.ToTableProperty: lambda self, e: f"TO {self.sql(e.this)}",
exp.TransformModelProperty: lambda self, e: self.func("TRANSFORM", *e.expressions),
@ -1846,7 +1839,7 @@ class Generator(metaclass=_Generator):
return f"{this} {kind} {expr}"
def tuple_sql(self, expression: exp.Tuple) -> str:
return f"({self.expressions(expression, flat=True)})"
return f"({self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)})"
def update_sql(self, expression: exp.Update) -> str:
this = self.sql(expression, "this")
@ -2994,9 +2987,6 @@ class Generator(metaclass=_Generator):
zone = self.sql(expression, "this")
return f"CURRENT_DATE({zone})" if zone else "CURRENT_DATE"
def currenttimestamp_sql(self, expression: exp.CurrentTimestamp) -> str:
return self.func("CURRENT_TIMESTAMP", expression.this)
def collate_sql(self, expression: exp.Collate) -> str:
if self.COLLATE_IS_FUNC:
return self.function_fallback_sql(expression)
@ -3354,7 +3344,9 @@ class Generator(metaclass=_Generator):
return f"{self.normalize_func(name)}{prefix}{self.format_args(*args)}{suffix}"
def format_args(self, *args: t.Optional[str | exp.Expression]) -> str:
arg_sqls = tuple(self.sql(arg) for arg in args if arg is not None)
arg_sqls = tuple(
self.sql(arg) for arg in args if arg is not None and not isinstance(arg, bool)
)
if self.pretty and self.too_wide(arg_sqls):
return self.indent("\n" + ",\n".join(arg_sqls) + "\n", skip_first=True, skip_last=True)
return ", ".join(arg_sqls)
@ -3397,12 +3389,8 @@ class Generator(metaclass=_Generator):
return sep.join(sql for sql in (self.sql(e) for e in expressions) if sql)
num_sqls = len(expressions)
# These are calculated once in case we have the leading_comma / pretty option set, correspondingly
if self.pretty and not self.leading_comma:
stripped_sep = sep.strip()
result_sqls = []
for i, e in enumerate(expressions):
sql = self.sql(e, comment=False)
if not sql:
@ -3415,7 +3403,7 @@ class Generator(metaclass=_Generator):
result_sqls.append(f"{sep if i > 0 else ''}{prefix}{sql}{comments}")
else:
result_sqls.append(
f"{prefix}{sql}{stripped_sep if i + 1 < num_sqls else ''}{comments}"
f"{prefix}{sql}{(sep.rstrip() if comments else sep) if i + 1 < num_sqls else ''}{comments}"
)
else:
result_sqls.append(f"{prefix}{sql}{comments}{sep if i + 1 < num_sqls else ''}")
@ -3424,7 +3412,7 @@ class Generator(metaclass=_Generator):
if new_line:
result_sqls.insert(0, "")
result_sqls.append("")
result_sql = "\n".join(result_sqls)
result_sql = "\n".join(s.rstrip() for s in result_sqls)
else:
result_sql = "".join(result_sqls)
return (
@ -3761,10 +3749,6 @@ class Generator(metaclass=_Generator):
return self.function_fallback_sql(expression)
def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
expression.set("is_end_exclusive", None)
return self.function_fallback_sql(expression)
def struct_sql(self, expression: exp.Struct) -> str:
expression.set(
"expressions",
@ -4027,9 +4011,6 @@ class Generator(metaclass=_Generator):
return self.func(self.PARSE_JSON_NAME, expression.this, expression.expression)
def length_sql(self, expression: exp.Length) -> str:
return self.func("LENGTH", expression.this)
def rand_sql(self, expression: exp.Rand) -> str:
lower = self.sql(expression, "lower")
upper = self.sql(expression, "upper")
@ -4038,17 +4019,6 @@ class Generator(metaclass=_Generator):
return f"({upper} - {lower}) * {self.func('RAND', expression.this)} + {lower}"
return self.func("RAND", expression.this)
def strtodate_sql(self, expression: exp.StrToDate) -> str:
return self.func("STR_TO_DATE", expression.this, expression.args.get("format"))
def strtotime_sql(self, expression: exp.StrToTime) -> str:
return self.func(
"STR_TO_TIME",
expression.this,
expression.args.get("format"),
expression.args.get("zone"),
)
def changes_sql(self, expression: exp.Changes) -> str:
information = self.sql(expression, "information")
information = f"INFORMATION => {information}"
@ -4067,3 +4037,7 @@ class Generator(metaclass=_Generator):
fill_pattern = "' '"
return self.func(f"{prefix}PAD", expression.this, expression.expression, fill_pattern)
def summarize_sql(self, expression: exp.Summarize) -> str:
table = " TABLE" if expression.args.get("table") else ""
return f"SUMMARIZE{table} {self.sql(expression.this)}"

View file

@ -179,8 +179,9 @@ def apply_index_offset(
if not expression.type:
annotate_types(expression)
if t.cast(exp.DataType, expression.type).this in exp.DataType.INTEGER_TYPES:
logger.warning("Applying array index offset (%s)", offset)
logger.info("Applying array index offset (%s)", offset)
expression = simplify(expression + offset)
return [expression]

View file

@ -393,6 +393,7 @@ class Parser(metaclass=_Parser):
TokenType.COMMIT,
TokenType.CONSTRAINT,
TokenType.COPY,
TokenType.CUBE,
TokenType.DEFAULT,
TokenType.DELETE,
TokenType.DESC,
@ -673,7 +674,7 @@ class Parser(metaclass=_Parser):
exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
exp.Column: lambda self: self._parse_column(),
exp.Condition: lambda self: self._parse_assignment(),
exp.DataType: lambda self: self._parse_types(allow_identifiers=False),
exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True),
exp.Expression: lambda self: self._parse_expression(),
exp.From: lambda self: self._parse_from(joins=True),
exp.Group: lambda self: self._parse_group(),
@ -2825,12 +2826,14 @@ class Parser(metaclass=_Parser):
this = self._parse_derived_table_values()
elif from_:
this = exp.select("*").from_(from_.this, copy=False)
elif self._match(TokenType.SUMMARIZE):
table = self._match(TokenType.TABLE)
this = self._parse_select() or self._parse_string() or self._parse_table()
return self.expression(exp.Summarize, this=this, table=table)
else:
this = None
if parse_set_operation:
return self._parse_set_operations(this)
return this
return self._parse_set_operations(this) if parse_set_operation else this
def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]:
if not skip_with_token and not self._match(TokenType.WITH):
@ -3825,7 +3828,7 @@ class Parser(metaclass=_Parser):
while True:
expressions = self._parse_csv(
lambda: None
if self._match(TokenType.ROLLUP, advance=False)
if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False)
else self._parse_assignment()
)
if expressions:
@ -4613,7 +4616,11 @@ class Parser(metaclass=_Parser):
matched_array = False
values = self._parse_csv(self._parse_assignment) or None
if values and not schema:
if (
values
and not schema
and this.is_type(exp.DataType.Type.ARRAY, exp.DataType.Type.MAP)
):
self._retreat(index)
break

View file

@ -364,6 +364,7 @@ class TokenType(AutoName):
STORAGE_INTEGRATION = auto()
STRAIGHT_JOIN = auto()
STRUCT = auto()
SUMMARIZE = auto()
TABLE_SAMPLE = auto()
TAG = auto()
TEMPORARY = auto()