Adding upstream version 23.16.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:34:56 +01:00 · 2025-02-13 21:34:56 +01:00 · b6ae88ec81
commit b6ae88ec81
parent 9d7e0ff7aa
93 changed files with 64106 additions and 59061 deletions
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@ -5,12 +5,14 @@ import typing as t
 from sqlglot import exp, generator, parser, tokens, transforms
 from sqlglot.dialects.dialect import (
    Dialect,
+    JSON_EXTRACT_TYPE,
    NormalizationStrategy,
    approx_count_distinct_sql,
    arg_max_or_min_no_count,
    arrow_json_extract_sql,
    binary_from_function,
    bool_xor_sql,
+    build_default_decimal_type,
    date_trunc_to_time,
    datestrtodate_sql,
    encode_decode_sql,
@ -155,6 +157,13 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str
    return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))


+def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
+    arrow_sql = arrow_json_extract_sql(self, expression)
+    if not expression.same_parent and isinstance(expression.parent, exp.Binary):
+        arrow_sql = self.wrap(arrow_sql)
+    return arrow_sql
+
+
 class DuckDB(Dialect):
    NULL_ORDERING = "nulls_are_last"
    SUPPORTS_USER_DEFINED_TYPES = False
@ -304,6 +313,22 @@ class DuckDB(Dialect):
            ),
        }

+        TYPE_CONVERTER = {
+            # https://duckdb.org/docs/sql/data_types/numeric
+            exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3),
+        }
+
+        def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
+            # https://duckdb.org/docs/sql/samples.html
+            sample = super()._parse_table_sample(as_modifier=as_modifier)
+            if sample and not sample.args.get("method"):
+                if sample.args.get("size"):
+                    sample.set("method", exp.var("RESERVOIR"))
+                else:
+                    sample.set("method", exp.var("SYSTEM"))
+
+            return sample
+
        def _parse_bracket(
            self, this: t.Optional[exp.Expression] = None
        ) -> t.Optional[exp.Expression]:
@ -320,24 +345,6 @@ class DuckDB(Dialect):
            args = self._parse_wrapped_csv(self._parse_conjunction)
            return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1))

-        def _parse_types(
-            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
-        ) -> t.Optional[exp.Expression]:
-            this = super()._parse_types(
-                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
-            )
-
-            # DuckDB treats NUMERIC and DECIMAL without precision as DECIMAL(18, 3)
-            # See: https://duckdb.org/docs/sql/data_types/numeric
-            if (
-                isinstance(this, exp.DataType)
-                and this.is_type("numeric", "decimal")
-                and not this.expressions
-            ):
-                return exp.DataType.build("DECIMAL(18, 3)")
-
-            return this
-
        def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]:
            return self._parse_field_def()

@ -368,6 +375,7 @@ class DuckDB(Dialect):
        CAN_IMPLEMENT_ARRAY_ANY = True
        SUPPORTS_TO_NUMBER = False
        COPY_HAS_INTO_KEYWORD = False
+        STAR_EXCEPT = "EXCLUDE"

        TRANSFORMS = {
            **generator.Generator.TRANSFORMS,
@ -406,11 +414,12 @@ class DuckDB(Dialect):
            exp.IntDiv: lambda self, e: self.binary(e, "//"),
            exp.IsInf: rename_func("ISINF"),
            exp.IsNan: rename_func("ISNAN"),
-            exp.JSONExtract: arrow_json_extract_sql,
-            exp.JSONExtractScalar: arrow_json_extract_sql,
+            exp.JSONExtract: _arrow_json_extract_sql,
+            exp.JSONExtractScalar: _arrow_json_extract_sql,
            exp.JSONFormat: _json_format_sql,
            exp.LogicalOr: rename_func("BOOL_OR"),
            exp.LogicalAnd: rename_func("BOOL_AND"),
+            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
            exp.MonthsBetween: lambda self, e: self.func(
                "DATEDIFF",
                "'month'",
@ -449,7 +458,7 @@ class DuckDB(Dialect):
            exp.TimestampDiff: lambda self, e: self.func(
                "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
            ),
-            exp.TimestampTrunc: timestamptrunc_sql,
+            exp.TimestampTrunc: timestamptrunc_sql(),
            exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
            exp.TimeStrToTime: timestrtotime_sql,
            exp.TimeStrToUnix: lambda self, e: self.func(
@ -499,8 +508,6 @@ class DuckDB(Dialect):
            exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
        }

-        STAR_MAPPING = {**generator.Generator.STAR_MAPPING, "except": "EXCLUDE"}
-
        UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)

        # DuckDB doesn't generally support CREATE TABLE .. properties
@ -550,6 +557,15 @@ class DuckDB(Dialect):
                # This sample clause only applies to a single source, not the entire resulting relation
                tablesample_keyword = "TABLESAMPLE"

+            if expression.args.get("size"):
+                method = expression.args.get("method")
+                if method and method.name.upper() != "RESERVOIR":
+                    self.unsupported(
+                        f"Sampling method {method} is not supported with a discrete sample count, "
+                        "defaulting to reservoir sampling"
+                    )
+                    expression.set("method", exp.var("RESERVOIR"))
+
            return super().tablesample_sql(
                expression, sep=sep, tablesample_keyword=tablesample_keyword
            )