Merging upstream version 9.0.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 14:48:46 +01:00 · 2025-02-13 14:48:46 +01:00 · 4483b8ff47
commit 4483b8ff47
parent ebb36a5fc5
87 changed files with 7994 additions and 421 deletions
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@ -78,6 +78,16 @@ def _create_sql(self, expression):

 class BigQuery(Dialect):
    unnest_column_only = True
+    time_mapping = {
+        "%M": "%-M",
+        "%d": "%-d",
+        "%m": "%-m",
+        "%y": "%-y",
+        "%H": "%-H",
+        "%I": "%-I",
+        "%S": "%-S",
+        "%j": "%-j",
+    }

    class Tokenizer(Tokenizer):
        QUOTES = [
@ -113,6 +123,7 @@ class BigQuery(Dialect):
            "DATETIME_SUB": _date_add(exp.DatetimeSub),
            "TIME_SUB": _date_add(exp.TimeSub),
            "TIMESTAMP_SUB": _date_add(exp.TimestampSub),
+            "PARSE_TIMESTAMP": lambda args: exp.StrToTime(this=list_get(args, 1), format=list_get(args, 0)),
        }

        NO_PAREN_FUNCTIONS = {
@ -137,6 +148,7 @@ class BigQuery(Dialect):
            exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
            exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
            exp.ILike: no_ilike_sql,
+            exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})",
            exp.TimeAdd: _date_add_sql("TIME", "ADD"),
            exp.TimeSub: _date_add_sql("TIME", "SUB"),
            exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@ -2,7 +2,7 @@ from enum import Enum

 from sqlglot import exp
 from sqlglot.generator import Generator
-from sqlglot.helper import list_get
+from sqlglot.helper import flatten, list_get
 from sqlglot.parser import Parser
 from sqlglot.time import format_time
 from sqlglot.tokens import Tokenizer
@ -67,6 +67,11 @@ class _Dialect(type):
            klass.generator_class.TRANSFORMS[
                exp.HexString
            ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
+        if klass.tokenizer_class._BYTE_STRINGS and exp.ByteString not in klass.generator_class.TRANSFORMS:
+            be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
+            klass.generator_class.TRANSFORMS[
+                exp.ByteString
+            ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"

        return klass

@ -176,11 +181,7 @@ class Dialect(metaclass=_Dialect):

 def rename_func(name):
    def _rename(self, expression):
-        args = (
-            expression.expressions
-            if isinstance(expression, exp.Func) and expression.is_var_len_args
-            else expression.args.values()
-        )
+        args = flatten(expression.args.values())
        return f"{name}({self.format_args(*args)})"

    return _rename
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@ -121,6 +121,9 @@ class Hive(Dialect):
        "ss": "%S",
        "s": "%-S",
        "S": "%f",
+        "a": "%p",
+        "DD": "%j",
+        "D": "%-j",
    }

    date_format = "'yyyy-MM-dd'"
@ -200,6 +203,7 @@ class Hive(Dialect):
            exp.AnonymousProperty: _property_sql,
            exp.ApproxDistinct: approx_count_distinct_sql,
            exp.ArrayAgg: rename_func("COLLECT_LIST"),
+            exp.ArrayConcat: rename_func("CONCAT"),
            exp.ArraySize: rename_func("SIZE"),
            exp.ArraySort: _array_sort,
            exp.With: no_recursive_cte_sql,
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@ -97,6 +97,8 @@ class MySQL(Dialect):
        "%s": "%S",
        "%S": "%S",
        "%u": "%W",
+        "%k": "%-H",
+        "%l": "%-I",
    }

    class Tokenizer(Tokenizer):
@ -145,6 +147,9 @@ class MySQL(Dialect):
            "_TIS620": TokenType.INTRODUCER,
            "_UCS2": TokenType.INTRODUCER,
            "_UJIS": TokenType.INTRODUCER,
+            # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
+            "N": TokenType.INTRODUCER,
+            "n": TokenType.INTRODUCER,
            "_UTF8": TokenType.INTRODUCER,
            "_UTF16": TokenType.INTRODUCER,
            "_UTF16LE": TokenType.INTRODUCER,
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@ -80,17 +80,12 @@ class Oracle(Dialect):
                sep="",
            )

-        def alias_sql(self, expression):
-            if isinstance(expression.this, exp.Table):
-                to_sql = self.sql(expression, "alias")
-                # oracle does not allow "AS" between table and alias
-                to_sql = f" {to_sql}" if to_sql else ""
-                return f"{self.sql(expression, 'this')}{to_sql}"
-            return super().alias_sql(expression)
-
        def offset_sql(self, expression):
            return f"{super().offset_sql(expression)} ROWS"

+        def table_sql(self, expression):
+            return super().table_sql(expression, sep=" ")
+
    class Tokenizer(Tokenizer):
        KEYWORDS = {
            **Tokenizer.KEYWORDS,
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@ -163,6 +163,7 @@ class Postgres(Dialect):
    class Tokenizer(Tokenizer):
        BIT_STRINGS = [("b'", "'"), ("B'", "'")]
        HEX_STRINGS = [("x'", "'"), ("X'", "'")]
+        BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
        KEYWORDS = {
            **Tokenizer.KEYWORDS,
            "ALWAYS": TokenType.ALWAYS,
@ -176,6 +177,11 @@ class Postgres(Dialect):
            "SMALLSERIAL": TokenType.SMALLSERIAL,
            "UUID": TokenType.UUID,
        }
+        QUOTES = ["'", "$$"]
+        SINGLE_TOKENS = {
+            **Tokenizer.SINGLE_TOKENS,
+            "$": TokenType.PARAMETER,
+        }

    class Parser(Parser):
        STRICT_CAST = False
--- a/sqlglot/dialects/presto.py
+++ b/sqlglot/dialects/presto.py
@ -172,6 +172,7 @@ class Presto(Dialect):
            **transforms.UNALIAS_GROUP,
            exp.ApproxDistinct: _approx_distinct_sql,
            exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
+            exp.ArrayConcat: rename_func("CONCAT"),
            exp.ArrayContains: rename_func("CONTAINS"),
            exp.ArraySize: rename_func("CARDINALITY"),
            exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -69,6 +69,35 @@ def _unix_to_time(self, expression):
    raise ValueError("Improper scale for timestamp")


+# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
+# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
+def _parse_date_part(self):
+    this = self._parse_var() or self._parse_type()
+    self._match(TokenType.COMMA)
+    expression = self._parse_bitwise()
+
+    name = this.name.upper()
+    if name.startswith("EPOCH"):
+        if name.startswith("EPOCH_MILLISECOND"):
+            scale = 10**3
+        elif name.startswith("EPOCH_MICROSECOND"):
+            scale = 10**6
+        elif name.startswith("EPOCH_NANOSECOND"):
+            scale = 10**9
+        else:
+            scale = None
+
+        ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
+        to_unix = self.expression(exp.TimeToUnix, this=ts)
+
+        if scale:
+            to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
+
+        return to_unix
+
+    return self.expression(exp.Extract, this=this, expression=expression)
+
+
 class Snowflake(Dialect):
    null_ordering = "nulls_are_large"
    time_format = "'yyyy-mm-dd hh24:mi:ss'"
@ -115,7 +144,7 @@ class Snowflake(Dialect):

        FUNCTION_PARSERS = {
            **Parser.FUNCTION_PARSERS,
-            "DATE_PART": lambda self: self._parse_extract(),
+            "DATE_PART": _parse_date_part,
        }

        FUNC_TOKENS = {
@ -161,9 +190,11 @@ class Snowflake(Dialect):
    class Generator(Generator):
        TRANSFORMS = {
            **Generator.TRANSFORMS,
+            exp.ArrayConcat: rename_func("ARRAY_CAT"),
            exp.If: rename_func("IFF"),
            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.UnixToTime: _unix_to_time,
+            exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
            exp.Array: inline_array_sql,
            exp.StrPosition: rename_func("POSITION"),
            exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}",
--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@ -1,9 +1,5 @@
 from sqlglot import exp
-from sqlglot.dialects.dialect import (
-    create_with_partitions_sql,
-    no_ilike_sql,
-    rename_func,
-)
+from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
 from sqlglot.dialects.hive import Hive
 from sqlglot.helper import list_get
 from sqlglot.parser import Parser
@ -98,13 +94,14 @@ class Spark(Hive):
        }

        TRANSFORMS = {
-            **{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort}},
+            **{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort, exp.ILike}},
+            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
            exp.FileFormatProperty: lambda self, e: f"USING {e.text('value').upper()}",
            exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
            exp.BitwiseLeftShift: rename_func("SHIFTLEFT"),
            exp.BitwiseRightShift: rename_func("SHIFTRIGHT"),
+            exp.DateTrunc: rename_func("TRUNC"),
            exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */",
-            exp.ILike: no_ilike_sql,
            exp.StrToDate: _str_to_date,
            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.UnixToTime: _unix_to_time,
@ -112,6 +109,8 @@ class Spark(Hive):
            exp.Map: _map_sql,
            exp.Reduce: rename_func("AGGREGATE"),
            exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
+            exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
+            exp.VariancePop: rename_func("VAR_POP"),
        }

        WRAP_DERIVED_VALUES = False
--- a/sqlglot/dialects/tsql.py
+++ b/sqlglot/dialects/tsql.py
@ -32,6 +32,11 @@ class TSQL(Dialect):
        }

    class Parser(Parser):
+        FUNCTIONS = {
+            **Parser.FUNCTIONS,
+            "CHARINDEX": exp.StrPosition.from_arg_list,
+        }
+
        def _parse_convert(self):
            to = self._parse_types()
            self._match(TokenType.COMMA)