Adding upstream version 25.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:41:00 +01:00 · 2025-02-13 21:41:00 +01:00 · 4e506fbac7
commit 4e506fbac7
parent 147b6e06e8
136 changed files with 80990 additions and 72541 deletions
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@ -15,11 +15,13 @@ from sqlglot.dialects.dialect import (
    build_default_decimal_type,
    date_trunc_to_time,
    datestrtodate_sql,
+    no_datetime_sql,
    encode_decode_sql,
    build_formatted_time,
    inline_array_unless_query,
    no_comment_column_constraint_sql,
    no_safe_divide_sql,
+    no_time_sql,
    no_timestamp_sql,
    pivot_column_names,
    regexp_extract_sql,
@ -218,6 +220,7 @@ class DuckDB(Dialect):
            "TIMESTAMP_US": TokenType.TIMESTAMP,
            "VARCHAR": TokenType.TEXT,
        }
+        KEYWORDS.pop("/*+")

        SINGLE_TOKENS = {
            **tokens.Tokenizer.SINGLE_TOKENS,
@ -407,6 +410,7 @@ class DuckDB(Dialect):
                "DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this
            ),
            exp.DateStrToDate: datestrtodate_sql,
+            exp.Datetime: no_datetime_sql,
            exp.DateToDi: lambda self,
            e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
            exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
@ -429,7 +433,6 @@ class DuckDB(Dialect):
                exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True),
                exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True),
            ),
-            exp.ParseJSON: rename_func("JSON"),
            exp.PercentileCont: rename_func("QUANTILE_CONT"),
            exp.PercentileDisc: rename_func("QUANTILE_DISC"),
            # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
@ -450,13 +453,12 @@ class DuckDB(Dialect):
            exp.Split: rename_func("STR_SPLIT"),
            exp.SortArray: _sort_array_sql,
            exp.StrPosition: str_position_sql,
-            exp.StrToDate: lambda self, e: f"CAST({str_to_time_sql(self, e)} AS DATE)",
-            exp.StrToTime: str_to_time_sql,
            exp.StrToUnix: lambda self, e: self.func(
                "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
            ),
            exp.Struct: _struct_sql,
            exp.TimeAdd: _date_delta_sql,
+            exp.Time: no_time_sql,
            exp.Timestamp: no_timestamp_sql,
            exp.TimestampDiff: lambda self, e: self.func(
                "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
@ -608,6 +610,24 @@ class DuckDB(Dialect):
        PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA
        PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE

+        def strtotime_sql(self, expression: exp.StrToTime) -> str:
+            if expression.args.get("safe"):
+                formatted_time = self.format_time(expression)
+                return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)"
+            return str_to_time_sql(self, expression)
+
+        def strtodate_sql(self, expression: exp.StrToDate) -> str:
+            if expression.args.get("safe"):
+                formatted_time = self.format_time(expression)
+                return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)"
+            return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
+
+        def parsejson_sql(self, expression: exp.ParseJSON) -> str:
+            arg = expression.this
+            if expression.args.get("safe"):
+                return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null()))
+            return self.func("JSON", arg)
+
        def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
            nano = expression.args.get("nano")
            if nano is not None:
@ -728,3 +748,33 @@ class DuckDB(Dialect):
            this = self.sql(expression, "this").rstrip(")")

            return f"{this}{expression_sql})"
+
+        def length_sql(self, expression: exp.Length) -> str:
+            arg = expression.this
+
+            # Dialects like BQ and Snowflake also accept binary values as args, so
+            # DDB will attempt to infer the type or resort to case/when resolution
+            if not expression.args.get("binary") or arg.is_string:
+                return self.func("LENGTH", arg)
+
+            if not arg.type:
+                from sqlglot.optimizer.annotate_types import annotate_types
+
+                arg = annotate_types(arg)
+
+            if arg.is_type(*exp.DataType.TEXT_TYPES):
+                return self.func("LENGTH", arg)
+
+            # We need these casts to make duckdb's static type checker happy
+            blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
+            varchar = exp.cast(arg, exp.DataType.Type.VARCHAR)
+
+            case = (
+                exp.case(self.func("TYPEOF", arg))
+                .when(
+                    "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar])
+                )  # anonymous to break length_sql recursion
+                .when("'BLOB'", self.func("OCTET_LENGTH", blob))
+            )
+
+            return self.sql(case)