Adding upstream version 6.3.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 14:44:19 +01:00 · 2025-02-13 14:44:19 +01:00 · 291e0c125c
commit 291e0c125c
parent 24cf9d8984
41 changed files with 1558 additions and 267 deletions
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@ -135,6 +135,7 @@ class BigQuery(Dialect):
            exp.DateSub: _date_add_sql("DATE", "SUB"),
            exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
            exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
+            exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
            exp.ILike: no_ilike_sql,
            exp.TimeAdd: _date_add_sql("TIME", "ADD"),
            exp.TimeSub: _date_add_sql("TIME", "SUB"),
@ -172,12 +173,11 @@ class BigQuery(Dialect):
            exp.AnonymousProperty,
        }

+        EXPLICIT_UNION = True
+
        def in_unnest_op(self, unnest):
            return self.sql(unnest)

-        def union_op(self, expression):
-            return f"UNION{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
-
        def except_op(self, expression):
            if not expression.args.get("distinct", False):
                self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery")
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@ -1,10 +1,16 @@
 from sqlglot import exp
-from sqlglot.dialects.dialect import Dialect, inline_array_sql
+from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
 from sqlglot.generator import Generator
-from sqlglot.parser import Parser
+from sqlglot.helper import csv
+from sqlglot.parser import Parser, parse_var_map
 from sqlglot.tokens import Tokenizer, TokenType


+def _lower_func(sql):
+    index = sql.index("(")
+    return sql[:index].lower() + sql[index:]
+
+
 class ClickHouse(Dialect):
    normalize_functions = None
    null_ordering = "nulls_are_last"
@ -14,17 +20,23 @@ class ClickHouse(Dialect):

        KEYWORDS = {
            **Tokenizer.KEYWORDS,
-            "NULLABLE": TokenType.NULLABLE,
            "FINAL": TokenType.FINAL,
+            "DATETIME64": TokenType.DATETIME,
            "INT8": TokenType.TINYINT,
            "INT16": TokenType.SMALLINT,
            "INT32": TokenType.INT,
            "INT64": TokenType.BIGINT,
            "FLOAT32": TokenType.FLOAT,
            "FLOAT64": TokenType.DOUBLE,
+            "TUPLE": TokenType.STRUCT,
        }

    class Parser(Parser):
+        FUNCTIONS = {
+            **Parser.FUNCTIONS,
+            "MAP": parse_var_map,
+        }
+
        def _parse_table(self, schema=False):
            this = super()._parse_table(schema)

@ -39,10 +51,25 @@ class ClickHouse(Dialect):
        TYPE_MAPPING = {
            **Generator.TYPE_MAPPING,
            exp.DataType.Type.NULLABLE: "Nullable",
+            exp.DataType.Type.DATETIME: "DateTime64",
+            exp.DataType.Type.MAP: "Map",
+            exp.DataType.Type.ARRAY: "Array",
+            exp.DataType.Type.STRUCT: "Tuple",
+            exp.DataType.Type.TINYINT: "Int8",
+            exp.DataType.Type.SMALLINT: "Int16",
+            exp.DataType.Type.INT: "Int32",
+            exp.DataType.Type.BIGINT: "Int64",
+            exp.DataType.Type.FLOAT: "Float32",
+            exp.DataType.Type.DOUBLE: "Float64",
        }

        TRANSFORMS = {
            **Generator.TRANSFORMS,
            exp.Array: inline_array_sql,
+            exp.StrPosition: lambda self, e: f"position({csv(self.sql(e, 'this'), self.sql(e, 'substr'), self.sql(e, 'position'))})",
            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
+            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
+            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
        }
+
+        EXPLICIT_UNION = True
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@ -77,7 +77,6 @@ class Dialect(metaclass=_Dialect):
    alias_post_tablesample = False
    normalize_functions = "upper"
    null_ordering = "nulls_are_small"
-    wrap_derived_values = True

    date_format = "'%Y-%m-%d'"
    dateint_format = "'%Y%m%d'"
@ -170,7 +169,6 @@ class Dialect(metaclass=_Dialect):
                "alias_post_tablesample": self.alias_post_tablesample,
                "normalize_functions": self.normalize_functions,
                "null_ordering": self.null_ordering,
-                "wrap_derived_values": self.wrap_derived_values,
                **opts,
            }
        )
@ -271,6 +269,21 @@ def struct_extract_sql(self, expression):
    return f"{this}.{struct_key}"


+def var_map_sql(self, expression):
+    keys = expression.args["keys"]
+    values = expression.args["values"]
+
+    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
+        self.unsupported("Cannot convert array columns into map.")
+        return f"MAP({self.sql(keys)}, {self.sql(values)})"
+
+    args = []
+    for key, value in zip(keys.expressions, values.expressions):
+        args.append(self.sql(key))
+        args.append(self.sql(value))
+    return f"MAP({csv(*args)})"
+
+
 def format_time_lambda(exp_class, dialect, default=None):
    """Helper used for time expressions.

--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@ -11,40 +11,14 @@ from sqlglot.dialects.dialect import (
    no_trycast_sql,
    rename_func,
    struct_extract_sql,
+    var_map_sql,
 )
 from sqlglot.generator import Generator
 from sqlglot.helper import csv, list_get
-from sqlglot.parser import Parser
+from sqlglot.parser import Parser, parse_var_map
 from sqlglot.tokens import Tokenizer


-def _parse_map(args):
-    keys = []
-    values = []
-    for i in range(0, len(args), 2):
-        keys.append(args[i])
-        values.append(args[i + 1])
-    return HiveMap(
-        keys=exp.Array(expressions=keys),
-        values=exp.Array(expressions=values),
-    )
-
-
-def _map_sql(self, expression):
-    keys = expression.args["keys"]
-    values = expression.args["values"]
-
-    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
-        self.unsupported("Cannot convert array columns into map use SparkSQL instead.")
-        return f"MAP({self.sql(keys)}, {self.sql(values)})"
-
-    args = []
-    for key, value in zip(keys.expressions, values.expressions):
-        args.append(self.sql(key))
-        args.append(self.sql(value))
-    return f"MAP({csv(*args)})"
-
-
 def _array_sort(self, expression):
    if expression.expression:
        self.unsupported("Hive SORT_ARRAY does not support a comparator")
@ -122,10 +96,6 @@ def _index_sql(self, expression):
    return f"{this} ON TABLE {table} {columns}"


-class HiveMap(exp.Map):
-    is_var_len_args = True
-
-
 class Hive(Dialect):
    alias_post_tablesample = True

@ -206,7 +176,7 @@ class Hive(Dialect):
                position=list_get(args, 2),
            ),
            "LOG": (lambda args: exp.Log.from_arg_list(args) if len(args) > 1 else exp.Ln.from_arg_list(args)),
-            "MAP": _parse_map,
+            "MAP": parse_var_map,
            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
            "PERCENTILE": exp.Quantile.from_arg_list,
            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
@ -245,8 +215,8 @@ class Hive(Dialect):
            exp.Join: _unnest_to_explode_sql,
            exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
            exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
-            exp.Map: _map_sql,
-            HiveMap: _map_sql,
+            exp.Map: var_map_sql,
+            exp.VarMap: var_map_sql,
            exp.Create: create_with_partitions_sql,
            exp.Quantile: rename_func("PERCENTILE"),
            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@ -10,6 +10,32 @@ def _limit_sql(self, expression):


 class Oracle(Dialect):
+    # https://docs.oracle.com/database/121/SQLRF/sql_elements004.htm#SQLRF00212
+    # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+    time_mapping = {
+        "AM": "%p",  # Meridian indicator with or without periods
+        "A.M.": "%p",  # Meridian indicator with or without periods
+        "PM": "%p",  # Meridian indicator with or without periods
+        "P.M.": "%p",  # Meridian indicator with or without periods
+        "D": "%u",  # Day of week (1-7)
+        "DAY": "%A",  # name of day
+        "DD": "%d",  # day of month (1-31)
+        "DDD": "%j",  # day of year (1-366)
+        "DY": "%a",  # abbreviated name of day
+        "HH": "%I",  # Hour of day (1-12)
+        "HH12": "%I",  # alias for HH
+        "HH24": "%H",  # Hour of day (0-23)
+        "IW": "%V",  # Calendar week of year (1-52 or 1-53), as defined by the ISO 8601 standard
+        "MI": "%M",  # Minute (0-59)
+        "MM": "%m",  # Month (01-12; January = 01)
+        "MON": "%b",  # Abbreviated name of month
+        "MONTH": "%B",  # Name of month
+        "SS": "%S",  # Second (0-59)
+        "WW": "%W",  # Week of year (1-53)
+        "YY": "%y",  # 15
+        "YYYY": "%Y",  # 2015
+    }
+
    class Generator(Generator):
        TYPE_MAPPING = {
            **Generator.TYPE_MAPPING,
@ -30,6 +56,9 @@ class Oracle(Dialect):
            **transforms.UNALIAS_GROUP,
            exp.ILike: no_ilike_sql,
            exp.Limit: _limit_sql,
+            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
+            exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
+            exp.UnixToTime: lambda self, e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)",
        }

        def query_modifiers(self, expression, *sqls):
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@ -118,13 +118,22 @@ def _serial_to_generated(expression):
    return expression


+def _to_timestamp(args):
+    # TO_TIMESTAMP accepts either a single double argument or (text, text)
+    if len(args) == 1 and args[0].is_number:
+        # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE
+        return exp.UnixToTime.from_arg_list(args)
+    # https://www.postgresql.org/docs/current/functions-formatting.html
+    return format_time_lambda(exp.StrToTime, "postgres")(args)
+
+
 class Postgres(Dialect):
    null_ordering = "nulls_are_large"
    time_format = "'YYYY-MM-DD HH24:MI:SS'"
    time_mapping = {
        "AM": "%p",
        "PM": "%p",
-        "D": "%w",  # 1-based day of week
+        "D": "%u",  # 1-based day of week
        "DD": "%d",  # day of month
        "DDD": "%j",  # zero padded day of year
        "FMDD": "%-d",  # - is no leading zero for Python; same for FM in postgres
@ -172,7 +181,7 @@ class Postgres(Dialect):

        FUNCTIONS = {
            **Parser.FUNCTIONS,
-            "TO_TIMESTAMP": format_time_lambda(exp.StrToTime, "postgres"),
+            "TO_TIMESTAMP": _to_timestamp,
            "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
        }

@ -211,4 +220,5 @@ class Postgres(Dialect):
            exp.TableSample: no_tablesample_sql,
            exp.Trim: _trim_sql,
            exp.TryCast: no_trycast_sql,
+            exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})",
        }
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -121,6 +121,7 @@ class Snowflake(Dialect):
        FUNC_TOKENS = {
            *Parser.FUNC_TOKENS,
            TokenType.RLIKE,
+            TokenType.TABLE,
        }

        COLUMN_OPERATORS = {
@ -143,7 +144,7 @@ class Snowflake(Dialect):

        SINGLE_TOKENS = {
            **Tokenizer.SINGLE_TOKENS,
-            "$": TokenType.DOLLAR,  # needed to break for quotes
+            "$": TokenType.PARAMETER,
        }

        KEYWORDS = {
@ -164,6 +165,8 @@ class Snowflake(Dialect):
            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.UnixToTime: _unix_to_time,
            exp.Array: inline_array_sql,
+            exp.StrPosition: rename_func("POSITION"),
+            exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}",
            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}",
        }

--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@ -4,8 +4,9 @@ from sqlglot.dialects.dialect import (
    no_ilike_sql,
    rename_func,
 )
-from sqlglot.dialects.hive import Hive, HiveMap
+from sqlglot.dialects.hive import Hive
 from sqlglot.helper import list_get
+from sqlglot.parser import Parser


 def _create_sql(self, e):
@ -47,8 +48,6 @@ def _unix_to_time(self, expression):


 class Spark(Hive):
-    wrap_derived_values = False
-
    class Parser(Hive.Parser):
        FUNCTIONS = {
            **Hive.Parser.FUNCTIONS,
@ -78,8 +77,19 @@ class Spark(Hive):
            "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
        }

-    class Generator(Hive.Generator):
+        FUNCTION_PARSERS = {
+            **Parser.FUNCTION_PARSERS,
+            "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"),
+            "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"),
+            "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"),
+            "MERGE": lambda self: self._parse_join_hint("MERGE"),
+            "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"),
+            "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"),
+            "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"),
+            "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"),
+        }

+    class Generator(Hive.Generator):
        TYPE_MAPPING = {
            **Hive.Generator.TYPE_MAPPING,
            exp.DataType.Type.TINYINT: "BYTE",
@ -102,8 +112,9 @@ class Spark(Hive):
            exp.Map: _map_sql,
            exp.Reduce: rename_func("AGGREGATE"),
            exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
-            HiveMap: _map_sql,
        }

+        WRAP_DERIVED_VALUES = False
+
    class Tokenizer(Hive.Tokenizer):
        HEX_STRINGS = [("X'", "'")]