Merging upstream version 10.6.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 15:09:58 +01:00 · 2025-02-13 15:09:58 +01:00 · ece6881255
commit ece6881255
parent d03a55eda6
48 changed files with 906 additions and 266 deletions
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@ -222,14 +222,6 @@ class BigQuery(Dialect):
            exp.DataType.Type.NVARCHAR: "STRING",
        }

-        ROOT_PROPERTIES = {
-            exp.LanguageProperty,
-            exp.ReturnsProperty,
-            exp.VolatilityProperty,
-        }
-
-        WITH_PROPERTIES = {exp.Property}
-
        EXPLICIT_UNION = True

        def array_sql(self, expression: exp.Array) -> str:
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@ -122,9 +122,15 @@ class Dialect(metaclass=_Dialect):
    def get_or_raise(cls, dialect):
        if not dialect:
            return cls
+        if isinstance(dialect, _Dialect):
+            return dialect
+        if isinstance(dialect, Dialect):
+            return dialect.__class__
+
        result = cls.get(dialect)
        if not result:
            raise ValueError(f"Unknown dialect '{dialect}'")
+
        return result

    @classmethod
@ -196,6 +202,10 @@ class Dialect(metaclass=_Dialect):
        )


+if t.TYPE_CHECKING:
+    DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
+
+
 def rename_func(name):
    def _rename(self, expression):
        args = flatten(expression.args.values())
--- a/sqlglot/dialects/drill.py
+++ b/sqlglot/dialects/drill.py
@ -137,7 +137,10 @@ class Drill(Dialect):
            exp.DataType.Type.DATETIME: "TIMESTAMP",
        }

-        ROOT_PROPERTIES = {exp.PartitionedByProperty}
+        PROPERTIES_LOCATION = {
+            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA_ROOT,
+        }

        TRANSFORMS = {
            **generator.Generator.TRANSFORMS,  # type: ignore
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@ -20,10 +20,6 @@ from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType


-def _unix_to_time(self, expression):
-    return f"TO_TIMESTAMP(CAST({self.sql(expression, 'this')} AS BIGINT))"
-
-
 def _str_to_time_sql(self, expression):
    return f"STRPTIME({self.sql(expression, 'this')}, {self.format_time(expression)})"

@ -113,7 +109,7 @@ class DuckDB(Dialect):
            "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
            "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
            "STRUCT_PACK": exp.Struct.from_arg_list,
-            "TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list,
+            "TO_TIMESTAMP": exp.UnixToTime.from_arg_list,
            "UNNEST": exp.Explode.from_arg_list,
        }

@ -162,9 +158,9 @@ class DuckDB(Dialect):
            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
            exp.TsOrDsAdd: _ts_or_ds_add,
            exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
-            exp.UnixToStr: lambda self, e: f"STRFTIME({_unix_to_time(self, e)}, {self.format_time(e)})",
-            exp.UnixToTime: _unix_to_time,
-            exp.UnixToTimeStr: lambda self, e: f"CAST({_unix_to_time(self, e)} AS TEXT)",
+            exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
+            exp.UnixToTime: rename_func("TO_TIMESTAMP"),
+            exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
        }

        TYPE_MAPPING = {
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@ -322,17 +322,11 @@ class Hive(Dialect):
            exp.LastDateOfMonth: rename_func("LAST_DAY"),
        }

-        WITH_PROPERTIES = {exp.Property}
-
-        ROOT_PROPERTIES = {
-            exp.PartitionedByProperty,
-            exp.FileFormatProperty,
-            exp.SchemaCommentProperty,
-            exp.LocationProperty,
-            exp.TableFormatProperty,
-            exp.RowFormatDelimitedProperty,
-            exp.RowFormatSerdeProperty,
-            exp.SerdeProperties,
+        PROPERTIES_LOCATION = {
+            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA_ROOT,
+            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA_ROOT,
+            exp.TableFormatProperty: exp.Properties.Location.POST_SCHEMA_ROOT,
        }

        def with_properties(self, properties):
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@ -1,7 +1,5 @@
 from __future__ import annotations

-import typing as t
-
 from sqlglot import exp, generator, parser, tokens
 from sqlglot.dialects.dialect import (
    Dialect,
@ -98,6 +96,8 @@ def _date_add_sql(kind):


 class MySQL(Dialect):
+    time_format = "'%Y-%m-%d %T'"
+
    # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions
    time_mapping = {
        "%M": "%B",
@ -110,6 +110,7 @@ class MySQL(Dialect):
        "%u": "%W",
        "%k": "%-H",
        "%l": "%-I",
+        "%T": "%H:%M:%S",
    }

    class Tokenizer(tokens.Tokenizer):
@ -428,6 +429,7 @@ class MySQL(Dialect):
            )

    class Generator(generator.Generator):
+        LOCKING_READS_SUPPORTED = True
        NULL_ORDERING_SUPPORTED = False

        TRANSFORMS = {
@ -449,23 +451,12 @@ class MySQL(Dialect):
            exp.StrPosition: strposition_to_locate_sql,
        }

-        ROOT_PROPERTIES = {
-            exp.EngineProperty,
-            exp.AutoIncrementProperty,
-            exp.CharacterSetProperty,
-            exp.CollateProperty,
-            exp.SchemaCommentProperty,
-            exp.LikeProperty,
-        }
-
        TYPE_MAPPING = generator.Generator.TYPE_MAPPING.copy()
        TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT)
        TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT)
        TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB)
        TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB)

-        WITH_PROPERTIES: t.Set[t.Type[exp.Property]] = set()
-
        def show_sql(self, expression):
            this = f" {expression.name}"
            full = " FULL" if expression.args.get("full") else ""
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@ -44,6 +44,8 @@ class Oracle(Dialect):
        }

    class Generator(generator.Generator):
+        LOCKING_READS_SUPPORTED = True
+
        TYPE_MAPPING = {
            **generator.Generator.TYPE_MAPPING,  # type: ignore
            exp.DataType.Type.TINYINT: "NUMBER",
@ -69,6 +71,7 @@ class Oracle(Dialect):
            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.UnixToTime: lambda self, e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)",
+            exp.Substring: rename_func("SUBSTR"),
        }

        def query_modifiers(self, expression, *sqls):
@ -90,6 +93,7 @@ class Oracle(Dialect):
                self.sql(expression, "order"),
                self.sql(expression, "offset"),  # offset before limit in oracle
                self.sql(expression, "limit"),
+                self.sql(expression, "lock"),
                sep="",
            )

--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@ -148,6 +148,22 @@ def _serial_to_generated(expression):
    return expression


+def _generate_series(args):
+    # The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day
+    step = seq_get(args, 2)
+
+    if step is None:
+        # Postgres allows calls with just two arguments -- the "step" argument defaults to 1
+        return exp.GenerateSeries.from_arg_list(args)
+
+    if step.is_string:
+        args[2] = exp.to_interval(step.this)
+    elif isinstance(step, exp.Interval) and not step.args.get("unit"):
+        args[2] = exp.to_interval(step.this.this)
+
+    return exp.GenerateSeries.from_arg_list(args)
+
+
 def _to_timestamp(args):
    # TO_TIMESTAMP accepts either a single double argument or (text, text)
    if len(args) == 1:
@ -195,29 +211,6 @@ class Postgres(Dialect):
        HEX_STRINGS = [("x'", "'"), ("X'", "'")]
        BYTE_STRINGS = [("e'", "'"), ("E'", "'")]

-        CREATABLES = (
-            "AGGREGATE",
-            "CAST",
-            "CONVERSION",
-            "COLLATION",
-            "DEFAULT CONVERSION",
-            "CONSTRAINT",
-            "DOMAIN",
-            "EXTENSION",
-            "FOREIGN",
-            "FUNCTION",
-            "OPERATOR",
-            "POLICY",
-            "ROLE",
-            "RULE",
-            "SEQUENCE",
-            "TEXT",
-            "TRIGGER",
-            "TYPE",
-            "UNLOGGED",
-            "USER",
-        )
-
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "~~": TokenType.LIKE,
@ -243,8 +236,6 @@ class Postgres(Dialect):
            "TEMP": TokenType.TEMPORARY,
            "UUID": TokenType.UUID,
            "CSTRING": TokenType.PSEUDO_TYPE,
-            **{f"CREATE {kind}": TokenType.COMMAND for kind in CREATABLES},
-            **{f"DROP {kind}": TokenType.COMMAND for kind in CREATABLES},
        }
        QUOTES = ["'", "$$"]
        SINGLE_TOKENS = {
@ -257,8 +248,10 @@ class Postgres(Dialect):

        FUNCTIONS = {
            **parser.Parser.FUNCTIONS,  # type: ignore
+            "NOW": exp.CurrentTimestamp.from_arg_list,
            "TO_TIMESTAMP": _to_timestamp,
            "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
+            "GENERATE_SERIES": _generate_series,
        }

        BITWISE = {
@ -272,6 +265,8 @@ class Postgres(Dialect):
        }

    class Generator(generator.Generator):
+        LOCKING_READS_SUPPORTED = True
+
        TYPE_MAPPING = {
            **generator.Generator.TYPE_MAPPING,  # type: ignore
            exp.DataType.Type.TINYINT: "SMALLINT",
--- a/sqlglot/dialects/presto.py
+++ b/sqlglot/dialects/presto.py
@ -105,6 +105,29 @@ def _ts_or_ds_add_sql(self, expression):
    return f"DATE_ADD({unit}, {e}, DATE_PARSE(SUBSTR({this}, 1, 10), {Presto.date_format}))"


+def _sequence_sql(self, expression):
+    start = expression.args["start"]
+    end = expression.args["end"]
+    step = expression.args.get("step", 1)  # Postgres defaults to 1 for generate_series
+
+    target_type = None
+
+    if isinstance(start, exp.Cast):
+        target_type = start.to
+    elif isinstance(end, exp.Cast):
+        target_type = end.to
+
+    if target_type and target_type.this == exp.DataType.Type.TIMESTAMP:
+        to = target_type.copy()
+
+        if target_type is start.to:
+            end = exp.Cast(this=end, to=to)
+        else:
+            start = exp.Cast(this=start, to=to)
+
+    return f"SEQUENCE({self.format_args(start, end, step)})"
+
+
 def _ensure_utf8(charset):
    if charset.name.lower() != "utf-8":
        raise UnsupportedError(f"Unsupported charset {charset}")
@ -145,7 +168,7 @@ def _from_unixtime(args):
 class Presto(Dialect):
    index_offset = 1
    null_ordering = "nulls_are_last"
-    time_format = "'%Y-%m-%d %H:%i:%S'"
+    time_format = MySQL.time_format  # type: ignore
    time_mapping = MySQL.time_mapping  # type: ignore

    class Tokenizer(tokens.Tokenizer):
@ -197,7 +220,10 @@ class Presto(Dialect):
    class Generator(generator.Generator):
        STRUCT_DELIMITER = ("(", ")")

-        ROOT_PROPERTIES = {exp.SchemaCommentProperty}
+        PROPERTIES_LOCATION = {
+            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.LocationProperty: exp.Properties.Location.UNSUPPORTED,
+        }

        TYPE_MAPPING = {
            **generator.Generator.TYPE_MAPPING,  # type: ignore
@ -223,6 +249,7 @@ class Presto(Dialect):
            exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
            exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
            exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
+            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
            exp.DataType: _datatype_sql,
            exp.DateAdd: lambda self, e: f"""DATE_ADD({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""",
            exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""",
@ -231,6 +258,7 @@ class Presto(Dialect):
            exp.Decode: _decode_sql,
            exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.dateint_format}) AS DATE)",
            exp.Encode: _encode_sql,
+            exp.GenerateSeries: _sequence_sql,
            exp.Hex: rename_func("TO_HEX"),
            exp.If: if_sql,
            exp.ILike: no_ilike_sql,
--- a/sqlglot/dialects/redshift.py
+++ b/sqlglot/dialects/redshift.py
@ -61,14 +61,9 @@ class Redshift(Postgres):
            exp.DataType.Type.INT: "INTEGER",
        }

-        ROOT_PROPERTIES = {
-            exp.DistKeyProperty,
-            exp.SortKeyProperty,
-            exp.DistStyleProperty,
-        }
-
-        WITH_PROPERTIES = {
-            exp.LikeProperty,
+        PROPERTIES_LOCATION = {
+            **Postgres.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.LikeProperty: exp.Properties.Location.POST_SCHEMA_WITH,
        }

        TRANSFORMS = {
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -234,15 +234,6 @@ class Snowflake(Dialect):
            "replace": "RENAME",
        }

-        ROOT_PROPERTIES = {
-            exp.PartitionedByProperty,
-            exp.ReturnsProperty,
-            exp.LanguageProperty,
-            exp.SchemaCommentProperty,
-            exp.ExecuteAsProperty,
-            exp.VolatilityProperty,
-        }
-
        def except_op(self, expression):
            if not expression.args.get("distinct", False):
                self.unsupported("EXCEPT with All is not supported in Snowflake")
--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@ -73,6 +73,19 @@ class Spark(Hive):
            ),
            "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
            "IIF": exp.If.from_arg_list,
+            "AGGREGATE": exp.Reduce.from_arg_list,
+            "DAYOFWEEK": lambda args: exp.DayOfWeek(
+                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
+            ),
+            "DAYOFMONTH": lambda args: exp.DayOfMonth(
+                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
+            ),
+            "DAYOFYEAR": lambda args: exp.DayOfYear(
+                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
+            ),
+            "WEEKOFYEAR": lambda args: exp.WeekOfYear(
+                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
+            ),
        }

        FUNCTION_PARSERS = {
@ -105,6 +118,14 @@ class Spark(Hive):
            exp.DataType.Type.BIGINT: "LONG",
        }

+        PROPERTIES_LOCATION = {
+            **Hive.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.EngineProperty: exp.Properties.Location.UNSUPPORTED,
+            exp.AutoIncrementProperty: exp.Properties.Location.UNSUPPORTED,
+            exp.CharacterSetProperty: exp.Properties.Location.UNSUPPORTED,
+            exp.CollateProperty: exp.Properties.Location.UNSUPPORTED,
+        }
+
        TRANSFORMS = {
            **Hive.Generator.TRANSFORMS,  # type: ignore
            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
@ -126,11 +147,27 @@ class Spark(Hive):
            exp.VariancePop: rename_func("VAR_POP"),
            exp.DateFromParts: rename_func("MAKE_DATE"),
            exp.LogicalOr: rename_func("BOOL_OR"),
+            exp.DayOfWeek: rename_func("DAYOFWEEK"),
+            exp.DayOfMonth: rename_func("DAYOFMONTH"),
+            exp.DayOfYear: rename_func("DAYOFYEAR"),
+            exp.WeekOfYear: rename_func("WEEKOFYEAR"),
+            exp.AtTimeZone: lambda self, e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})",
        }
        TRANSFORMS.pop(exp.ArraySort)
        TRANSFORMS.pop(exp.ILike)

        WRAP_DERIVED_VALUES = False

+        def cast_sql(self, expression: exp.Cast) -> str:
+            if isinstance(expression.this, exp.Cast) and expression.this.is_type(
+                exp.DataType.Type.JSON
+            ):
+                schema = f"'{self.sql(expression, 'to')}'"
+                return f"FROM_JSON({self.format_args(self.sql(expression.this, 'this'), schema)})"
+            if expression.to.is_type(exp.DataType.Type.JSON):
+                return f"TO_JSON({self.sql(expression, 'this')})"
+
+            return super(Spark.Generator, self).cast_sql(expression)
+
    class Tokenizer(Hive.Tokenizer):
        HEX_STRINGS = [("X'", "'")]
--- a/sqlglot/dialects/tableau.py
+++ b/sqlglot/dialects/tableau.py
@ -31,6 +31,5 @@ class Tableau(Dialect):
    class Parser(parser.Parser):
        FUNCTIONS = {
            **parser.Parser.FUNCTIONS,  # type: ignore
-            "IFNULL": exp.Coalesce.from_arg_list,
            "COUNTD": lambda args: exp.Count(this=exp.Distinct(expressions=args)),
        }
--- a/sqlglot/dialects/teradata.py
+++ b/sqlglot/dialects/teradata.py
@ -76,6 +76,14 @@ class Teradata(Dialect):
            )

    class Generator(generator.Generator):
+        PROPERTIES_LOCATION = {
+            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
+            exp.PartitionedByProperty: exp.Properties.Location.POST_INDEX,
+        }
+
+        def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
+            return f"PARTITION BY {self.sql(expression, 'this')}"
+
        # FROM before SET in Teradata UPDATE syntax
        # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause
        def update_sql(self, expression: exp.Update) -> str:
--- a/sqlglot/dialects/tsql.py
+++ b/sqlglot/dialects/tsql.py
@ -412,6 +412,8 @@ class TSQL(Dialect):
            return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)

    class Generator(generator.Generator):
+        LOCKING_READS_SUPPORTED = True
+
        TYPE_MAPPING = {
            **generator.Generator.TYPE_MAPPING,  # type: ignore
            exp.DataType.Type.BOOLEAN: "BIT",