Merging upstream version 10.5.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 15:03:38 +01:00 · 2025-02-13 15:03:38 +01:00 · e0f3bbb5f3
commit e0f3bbb5f3
parent 77197f1e44
58 changed files with 1480 additions and 383 deletions
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@ -2,7 +2,7 @@

 from __future__ import annotations

-from sqlglot import exp, generator, parser, tokens
+from sqlglot import exp, generator, parser, tokens, transforms
 from sqlglot.dialects.dialect import (
    Dialect,
    datestrtodate_sql,
@ -46,8 +46,9 @@ def _date_add_sql(data_type, kind):

 def _derived_table_values_to_unnest(self, expression):
    if not isinstance(expression.unnest().parent, exp.From):
+        expression = transforms.remove_precision_parameterized_types(expression)
        return self.values_sql(expression)
-    rows = [list(tuple_exp.find_all(exp.Literal)) for tuple_exp in expression.find_all(exp.Tuple)]
+    rows = [tuple_exp.expressions for tuple_exp in expression.find_all(exp.Tuple)]
    structs = []
    for row in rows:
        aliases = [
@ -118,6 +119,7 @@ class BigQuery(Dialect):
            "BEGIN TRANSACTION": TokenType.BEGIN,
            "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
            "CURRENT_TIME": TokenType.CURRENT_TIME,
+            "DECLARE": TokenType.COMMAND,
            "GEOGRAPHY": TokenType.GEOGRAPHY,
            "FLOAT64": TokenType.DOUBLE,
            "INT64": TokenType.BIGINT,
@ -166,6 +168,7 @@ class BigQuery(Dialect):
    class Generator(generator.Generator):
        TRANSFORMS = {
            **generator.Generator.TRANSFORMS,  # type: ignore
+            **transforms.REMOVE_PRECISION_PARAMETERIZED_TYPES,  # type: ignore
            exp.ArraySize: rename_func("ARRAY_LENGTH"),
            exp.DateAdd: _date_add_sql("DATE", "ADD"),
            exp.DateSub: _date_add_sql("DATE", "SUB"),
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@ -1,5 +1,7 @@
 from __future__ import annotations

+import typing as t
+
 from sqlglot import exp, generator, parser, tokens
 from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
 from sqlglot.parser import parse_var_map
@ -22,6 +24,7 @@ class ClickHouse(Dialect):
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "ASOF": TokenType.ASOF,
+            "GLOBAL": TokenType.GLOBAL,
            "DATETIME64": TokenType.DATETIME,
            "FINAL": TokenType.FINAL,
            "FLOAT32": TokenType.FLOAT,
@ -37,14 +40,32 @@ class ClickHouse(Dialect):
        FUNCTIONS = {
            **parser.Parser.FUNCTIONS,  # type: ignore
            "MAP": parse_var_map,
+            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
+            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
+            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
+        }
+
+        RANGE_PARSERS = {
+            **parser.Parser.RANGE_PARSERS,
+            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
+            and self._parse_in(this, is_global=True),
        }

        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore

        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore

-        def _parse_table(self, schema=False):
-            this = super()._parse_table(schema)
+        def _parse_in(
+            self, this: t.Optional[exp.Expression], is_global: bool = False
+        ) -> exp.Expression:
+            this = super()._parse_in(this)
+            this.set("is_global", is_global)
+            return this
+
+        def _parse_table(
+            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
+        ) -> t.Optional[exp.Expression]:
+            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)

            if self._match(TokenType.FINAL):
                this = self.expression(exp.Final, this=this)
@ -76,6 +97,16 @@ class ClickHouse(Dialect):
            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
+            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
+            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
+            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
        }

        EXPLICIT_UNION = True
+
+        def _param_args_sql(
+            self, expression: exp.Expression, params_name: str, args_name: str
+        ) -> str:
+            params = self.format_args(self.expressions(expression, params_name))
+            args = self.format_args(self.expressions(expression, args_name))
+            return f"({params})({args})"
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@ -381,3 +381,20 @@ def timestrtotime_sql(self, expression: exp.TimeStrToTime) -> str:

 def datestrtodate_sql(self, expression: exp.DateStrToDate) -> str:
    return f"CAST({self.sql(expression, 'this')} AS DATE)"
+
+
+def trim_sql(self, expression):
+    target = self.sql(expression, "this")
+    trim_type = self.sql(expression, "position")
+    remove_chars = self.sql(expression, "expression")
+    collation = self.sql(expression, "collation")
+
+    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
+    if not remove_chars and not collation:
+        return self.trim_sql(expression)
+
+    trim_type = f"{trim_type} " if trim_type else ""
+    remove_chars = f"{remove_chars} " if remove_chars else ""
+    from_part = "FROM " if trim_type or remove_chars else ""
+    collation = f" COLLATE {collation}" if collation else ""
+    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@ -175,14 +175,6 @@ class Hive(Dialect):
        ESCAPES = ["\\"]
        ENCODE = "utf-8"

-        NUMERIC_LITERALS = {
-            "L": "BIGINT",
-            "S": "SMALLINT",
-            "Y": "TINYINT",
-            "D": "DOUBLE",
-            "F": "FLOAT",
-            "BD": "DECIMAL",
-        }
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "ADD ARCHIVE": TokenType.COMMAND,
@ -191,9 +183,21 @@ class Hive(Dialect):
            "ADD FILES": TokenType.COMMAND,
            "ADD JAR": TokenType.COMMAND,
            "ADD JARS": TokenType.COMMAND,
+            "MSCK REPAIR": TokenType.COMMAND,
            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
        }

+        NUMERIC_LITERALS = {
+            "L": "BIGINT",
+            "S": "SMALLINT",
+            "Y": "TINYINT",
+            "D": "DOUBLE",
+            "F": "FLOAT",
+            "BD": "DECIMAL",
+        }
+
+        IDENTIFIER_CAN_START_WITH_DIGIT = True
+
    class Parser(parser.Parser):
        STRICT_CAST = False

@ -315,6 +319,7 @@ class Hive(Dialect):
            exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}",
            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
+            exp.LastDateOfMonth: rename_func("LAST_DAY"),
        }

        WITH_PROPERTIES = {exp.Property}
@ -342,4 +347,6 @@ class Hive(Dialect):
                and not expression.expressions
            ):
                expression = exp.DataType.build("text")
+            elif expression.this in exp.DataType.TEMPORAL_TYPES:
+                expression = exp.DataType.build(expression.this)
            return super().datatype_sql(expression)
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@ -1,7 +1,7 @@
 from __future__ import annotations

 from sqlglot import exp, generator, parser, tokens, transforms
-from sqlglot.dialects.dialect import Dialect, no_ilike_sql, rename_func
+from sqlglot.dialects.dialect import Dialect, no_ilike_sql, rename_func, trim_sql
 from sqlglot.helper import csv
 from sqlglot.tokens import TokenType

@ -64,6 +64,7 @@ class Oracle(Dialect):
            **transforms.UNALIAS_GROUP,  # type: ignore
            exp.ILike: no_ilike_sql,
            exp.Limit: _limit_sql,
+            exp.Trim: trim_sql,
            exp.Matches: rename_func("DECODE"),
            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@ -10,6 +10,7 @@ from sqlglot.dialects.dialect import (
    no_tablesample_sql,
    no_trycast_sql,
    str_position_sql,
+    trim_sql,
 )
 from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType
@ -81,23 +82,6 @@ def _substring_sql(self, expression):
    return f"SUBSTRING({this}{from_part}{for_part})"


-def _trim_sql(self, expression):
-    target = self.sql(expression, "this")
-    trim_type = self.sql(expression, "position")
-    remove_chars = self.sql(expression, "expression")
-    collation = self.sql(expression, "collation")
-
-    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't postgres-specific
-    if not remove_chars and not collation:
-        return self.trim_sql(expression)
-
-    trim_type = f"{trim_type} " if trim_type else ""
-    remove_chars = f"{remove_chars} " if remove_chars else ""
-    from_part = "FROM " if trim_type or remove_chars else ""
-    collation = f" COLLATE {collation}" if collation else ""
-    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
-
-
 def _string_agg_sql(self, expression):
    expression = expression.copy()
    separator = expression.args.get("separator") or exp.Literal.string(",")
@ -248,7 +232,6 @@ class Postgres(Dialect):
            "COMMENT ON": TokenType.COMMAND,
            "DECLARE": TokenType.COMMAND,
            "DO": TokenType.COMMAND,
-            "DOUBLE PRECISION": TokenType.DOUBLE,
            "GENERATED": TokenType.GENERATED,
            "GRANT": TokenType.COMMAND,
            "HSTORE": TokenType.HSTORE,
@ -318,7 +301,7 @@ class Postgres(Dialect):
            exp.Substring: _substring_sql,
            exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.TableSample: no_tablesample_sql,
-            exp.Trim: _trim_sql,
+            exp.Trim: trim_sql,
            exp.TryCast: no_trycast_sql,
            exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})",
            exp.DataType: _datatype_sql,
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -195,7 +195,6 @@ class Snowflake(Dialect):
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "QUALIFY": TokenType.QUALIFY,
-            "DOUBLE PRECISION": TokenType.DOUBLE,
            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
            "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
@ -294,3 +293,10 @@ class Snowflake(Dialect):
                )
                return self.no_identify(lambda: super(self.__class__, self).select_sql(expression))
            return super().select_sql(expression)
+
+        def describe_sql(self, expression: exp.Describe) -> str:
+            # Default to table if kind is unknown
+            kind_value = expression.args.get("kind") or "TABLE"
+            kind = f" {kind_value}" if kind_value else ""
+            this = f" {self.sql(expression, 'this')}"
+            return f"DESCRIBE{kind}{this}"
--- a/sqlglot/dialects/tsql.py
+++ b/sqlglot/dialects/tsql.py
@ -75,6 +75,20 @@ def _parse_format(args):
    )


+def _parse_eomonth(args):
+    date = seq_get(args, 0)
+    month_lag = seq_get(args, 1)
+    unit = DATE_DELTA_INTERVAL.get("month")
+
+    if month_lag is None:
+        return exp.LastDateOfMonth(this=date)
+
+    # Remove month lag argument in parser as its compared with the number of arguments of the resulting class
+    args.remove(month_lag)
+
+    return exp.LastDateOfMonth(this=exp.DateAdd(this=date, expression=month_lag, unit=unit))
+
+
 def generate_date_delta_with_unit_sql(self, e):
    func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF"
    return f"{func}({self.format_args(e.text('unit'), e.expression, e.this)})"
@ -256,12 +270,14 @@ class TSQL(Dialect):
            "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL),
            "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True),
            "DATEPART": _format_time_lambda(exp.TimeToStr),
-            "GETDATE": exp.CurrentDate.from_arg_list,
+            "GETDATE": exp.CurrentTimestamp.from_arg_list,
+            "SYSDATETIME": exp.CurrentTimestamp.from_arg_list,
            "IIF": exp.If.from_arg_list,
            "LEN": exp.Length.from_arg_list,
            "REPLICATE": exp.Repeat.from_arg_list,
            "JSON_VALUE": exp.JSONExtractScalar.from_arg_list,
            "FORMAT": _parse_format,
+            "EOMONTH": _parse_eomonth,
        }

        VAR_LENGTH_DATATYPES = {
@ -271,6 +287,9 @@ class TSQL(Dialect):
            DataType.Type.NCHAR,
        }

+        # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-temporary#create-a-temporary-table
+        TABLE_PREFIX_TOKENS = {TokenType.HASH}
+
        def _parse_convert(self, strict):
            to = self._parse_types()
            self._match(TokenType.COMMA)
@ -323,6 +342,7 @@ class TSQL(Dialect):
            exp.DateAdd: generate_date_delta_with_unit_sql,
            exp.DateDiff: generate_date_delta_with_unit_sql,
            exp.CurrentDate: rename_func("GETDATE"),
+            exp.CurrentTimestamp: rename_func("GETDATE"),
            exp.If: rename_func("IIF"),
            exp.NumberToStr: _format_sql,
            exp.TimeToStr: _format_sql,