Merging upstream version 25.16.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:52:32 +01:00 · 2025-02-13 21:52:32 +01:00 · bad79d1f7c
commit bad79d1f7c
parent 7688e2bdf8
110 changed files with 75353 additions and 68092 deletions
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@ -133,6 +133,10 @@ class _Dialect(type):
        klass.INVERSE_FORMAT_MAPPING = {v: k for k, v in klass.FORMAT_MAPPING.items()}
        klass.INVERSE_FORMAT_TRIE = new_trie(klass.INVERSE_FORMAT_MAPPING)

+        klass.INVERSE_CREATABLE_KIND_MAPPING = {
+            v: k for k, v in klass.CREATABLE_KIND_MAPPING.items()
+        }
+
        base = seq_get(bases, 0)
        base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),)
        base_jsonpath_tokenizer = (getattr(base, "jsonpath_tokenizer_class", JSONPathTokenizer),)
@ -183,6 +187,9 @@ class _Dialect(type):
        if enum not in ("", "bigquery"):
            klass.generator_class.SELECT_KINDS = ()

+        if enum not in ("", "clickhouse"):
+            klass.generator_class.SUPPORTS_NULLABLE_TYPES = False
+
        if enum not in ("", "athena", "presto", "trino"):
            klass.generator_class.TRY_SUPPORTED = False
            klass.generator_class.SUPPORTS_UESCAPE = False
@ -369,6 +376,24 @@ class Dialect(metaclass=_Dialect):
    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
    """

+    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
+    """
+    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
+    as the former is of type INT[] vs the latter which is SUPER
+    """
+
+    SUPPORTS_FIXED_SIZE_ARRAYS = False
+    """
+    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In
+    dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
+    """
+
+    CREATABLE_KIND_MAPPING: dict[str, str] = {}
+    """
+    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
+    equivalent of CREATE SCHEMA is CREATE DATABASE.
+    """
+
    # --- Autofilled ---

    tokenizer_class = Tokenizer
@ -385,6 +410,8 @@ class Dialect(metaclass=_Dialect):
    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
    INVERSE_FORMAT_TRIE: t.Dict = {}

+    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
+
    ESCAPED_SEQUENCES: t.Dict[str, str] = {}

    # Delimiters for string literals and identifiers
@ -635,6 +662,9 @@ class Dialect(metaclass=_Dialect):
        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
            e, exp.DataType.build("ARRAY<DATE>")
        ),
+        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.build("ARRAY<TIMESTAMP>")
+        ),
        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
        exp.Least: lambda self, e: self._annotate_by_args(e, "expressions"),
@ -1214,7 +1244,13 @@ def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:


 def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
-    return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP))
+    datatype = (
+        exp.DataType.Type.TIMESTAMPTZ
+        if expression.args.get("zone")
+        else exp.DataType.Type.TIMESTAMP
+    )
+
+    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))


 def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
@ -1464,14 +1500,19 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
        targets.add(normalize(alias.this))

    for when in expression.expressions:
-        when.transform(
-            lambda node: (
-                exp.column(node.this)
-                if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
-                else node
-            ),
-            copy=False,
-        )
+        # only remove the target names from the THEN clause
+        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
+        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
+        then = when.args.get("then")
+        if then:
+            then.transform(
+                lambda node: (
+                    exp.column(node.this)
+                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
+                    else node
+                ),
+                copy=False,
+            )

    return self.merge_sql(expression)

@ -1590,9 +1631,9 @@ def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)


-def sequence_sql(self: Generator, expression: exp.GenerateSeries):
-    start = expression.args["start"]
-    end = expression.args["end"]
+def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
+    start = expression.args.get("start")
+    end = expression.args.get("end")
    step = expression.args.get("step")

    if isinstance(start, exp.Cast):
@ -1602,8 +1643,8 @@ def sequence_sql(self: Generator, expression: exp.GenerateSeries):
    else:
        target_type = None

-    if target_type and target_type.is_type("timestamp"):
-        if target_type is start.to:
+    if start and end and target_type and target_type.is_type("date", "timestamp"):
+        if isinstance(start, exp.Cast) and target_type is start.to:
            end = exp.cast(end, target_type)
        else:
            start = exp.cast(start, target_type)