Adding upstream version 25.32.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:57:29 +01:00 · 2025-02-13 21:57:29 +01:00 · 24751d63a1
commit 24751d63a1
parent ec2e441f55
74 changed files with 2284 additions and 1814 deletions
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -198,43 +198,58 @@ def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.
    return expression


-def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression:
+def _unnest_generate_date_array(unnest: exp.Unnest) -> None:
+    generate_date_array = unnest.expressions[0]
+    start = generate_date_array.args.get("start")
+    end = generate_date_array.args.get("end")
+    step = generate_date_array.args.get("step")
+
+    if not start or not end or not isinstance(step, exp.Interval) or step.name != "1":
+        return
+
+    unit = step.args.get("unit")
+
+    unnest_alias = unnest.args.get("alias")
+    if unnest_alias:
+        unnest_alias = unnest_alias.copy()
+        sequence_value_name = seq_get(unnest_alias.columns, 0) or "value"
+    else:
+        sequence_value_name = "value"
+
+    # We'll add the next sequence value to the starting date and project the result
+    date_add = _build_date_time_add(exp.DateAdd)(
+        [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")]
+    ).as_(sequence_value_name)
+
+    # We use DATEDIFF to compute the number of sequence values needed
+    number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"](
+        [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1]
+    )
+
+    unnest.set("expressions", [number_sequence])
+    unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias))
+
+
+def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression:
    if isinstance(expression, exp.Select):
-        for unnest in expression.find_all(exp.Unnest):
-            if (
-                isinstance(unnest.parent, (exp.From, exp.Join))
-                and len(unnest.expressions) == 1
-                and isinstance(unnest.expressions[0], exp.GenerateDateArray)
-            ):
-                generate_date_array = unnest.expressions[0]
-                start = generate_date_array.args.get("start")
-                end = generate_date_array.args.get("end")
-                step = generate_date_array.args.get("step")
+        for generate_date_array in expression.find_all(exp.GenerateDateArray):
+            parent = generate_date_array.parent

-                if not start or not end or not isinstance(step, exp.Interval) or step.name != "1":
-                    continue
-
-                unit = step.args.get("unit")
-
-                unnest_alias = unnest.args.get("alias")
-                if unnest_alias:
-                    unnest_alias = unnest_alias.copy()
-                    sequence_value_name = seq_get(unnest_alias.columns, 0) or "value"
-                else:
-                    sequence_value_name = "value"
-
-                # We'll add the next sequence value to the starting date and project the result
-                date_add = _build_date_time_add(exp.DateAdd)(
-                    [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")]
-                ).as_(sequence_value_name)
-
-                # We use DATEDIFF to compute the number of sequence values needed
-                number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"](
-                    [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1]
+            # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake
+            # query is the following (it'll be unnested properly on the next iteration due to copy):
+            # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...))))
+            if not isinstance(parent, exp.Unnest):
+                unnest = exp.Unnest(expressions=[generate_date_array.copy()])
+                generate_date_array.replace(
+                    exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery()
                )

-                unnest.set("expressions", [number_sequence])
-                unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias))
+            if (
+                isinstance(parent, exp.Unnest)
+                and isinstance(parent.parent, (exp.From, exp.Join))
+                and len(parent.expressions) == 1
+            ):
+                _unnest_generate_date_array(parent)

    return expression

@ -465,6 +480,7 @@ class Snowflake(Dialect):
        PROPERTY_PARSERS = {
            **parser.Parser.PROPERTY_PARSERS,
            "LOCATION": lambda self: self._parse_location_property(),
+            "TAG": lambda self: self._parse_tag(),
        }

        TYPE_CONVERTERS = {
@ -546,6 +562,12 @@ class Snowflake(Dialect):

            return self.expression(exp.Not, this=this)

+        def _parse_tag(self) -> exp.Tags:
+            return self.expression(
+                exp.Tags,
+                expressions=self._parse_wrapped_csv(self._parse_property),
+            )
+
        def _parse_with_constraint(self) -> t.Optional[exp.Expression]:
            if self._prev.token_type != TokenType.WITH:
                self._retreat(self._index - 1)
@ -565,13 +587,16 @@ class Snowflake(Dialect):
                    this=policy.to_dot() if isinstance(policy, exp.Column) else policy,
                )
            if self._match(TokenType.TAG):
-                return self.expression(
-                    exp.TagColumnConstraint,
-                    expressions=self._parse_wrapped_csv(self._parse_property),
-                )
+                return self._parse_tag()

            return None

+        def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
+            if self._match(TokenType.TAG):
+                return self._parse_tag()
+
+            return super()._parse_with_property()
+
        def _parse_create(self) -> exp.Create | exp.Command:
            expression = super()._parse_create()
            if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES:
@ -893,7 +918,7 @@ class Snowflake(Dialect):
                    transforms.eliminate_distinct_on,
                    transforms.explode_to_unnest(),
                    transforms.eliminate_semi_and_anti_joins,
-                    _unnest_generate_date_array,
+                    _transform_generate_date_array,
                ]
            ),
            exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"),