Merging upstream version 10.6.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 15:08:15 +01:00 · 2025-02-13 15:08:15 +01:00 · 2153103f81
commit 2153103f81
parent fe1b1057f7
36 changed files with 1007 additions and 270 deletions
--- a/sqlglot/dialects/init.py
+++ b/sqlglot/dialects/init.py
@ -1,17 +1,14 @@
 """
 ## Dialects

-One of the core abstractions in SQLGlot is the concept of a "dialect". The `Dialect` class essentially implements a
-"SQLGlot dialect", which aims to be as generic and ANSI-compliant as possible. It relies on the base `Tokenizer`,
-`Parser` and `Generator` classes to achieve this goal, so these need to be very lenient when it comes to consuming
-SQL code.
+While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
+to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
+SQL transpilation framework. 

-However, there are cases where the syntax of different SQL dialects varies wildly, even for common tasks. One such
-example is the date/time functions, which can be hard to deal with. For this reason, it's sometimes necessary to
-override the base dialect in order to specialize its behavior. This can be easily done in SQLGlot: supporting new
-dialects is as simple as subclassing from `Dialect` and overriding its various components (e.g. the `Parser` class),
-in order to implement the target behavior.
+The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.

+Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
+classes as needed.

 ### Implementing a custom Dialect

--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@ -169,6 +169,13 @@ class BigQuery(Dialect):
            TokenType.VALUES,
        }

+        PROPERTY_PARSERS = {
+            **parser.Parser.PROPERTY_PARSERS,  # type: ignore
+            "NOT DETERMINISTIC": lambda self: self.expression(
+                exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
+            ),
+        }
+
    class Generator(generator.Generator):
        TRANSFORMS = {
            **generator.Generator.TRANSFORMS,  # type: ignore
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@ -66,12 +66,11 @@ def _sort_array_reverse(args):
    return exp.SortArray(this=seq_get(args, 0), asc=exp.false())


-def _struct_pack_sql(self, expression):
+def _struct_sql(self, expression):
    args = [
-        self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e)
-        for e in expression.expressions
+        f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions
    ]
-    return f"STRUCT_PACK({', '.join(args)})"
+    return f"{{{', '.join(args)}}}"


 def _datatype_sql(self, expression):
@ -153,7 +152,7 @@ class DuckDB(Dialect):
            exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
            exp.StrToTime: _str_to_time_sql,
            exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
-            exp.Struct: _struct_pack_sql,
+            exp.Struct: _struct_sql,
            exp.TableSample: no_tablesample_sql,
            exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
            exp.TimeStrToTime: timestrtotime_sql,
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@ -251,7 +251,7 @@ class Hive(Dialect):

        PROPERTY_PARSERS = {
            **parser.Parser.PROPERTY_PARSERS,  # type: ignore
-            TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties(
+            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
                expressions=self._parse_wrapped_csv(self._parse_property)
            ),
        }
--- a/sqlglot/dialects/mysql.py
+++ b/sqlglot/dialects/mysql.py
@ -202,7 +202,7 @@ class MySQL(Dialect):

        PROPERTY_PARSERS = {
            **parser.Parser.PROPERTY_PARSERS,  # type: ignore
-            TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty),
+            "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
        }

        STATEMENT_PARSERS = {
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@ -74,13 +74,16 @@ class Oracle(Dialect):
        def query_modifiers(self, expression, *sqls):
            return csv(
                *sqls,
-                *[self.sql(sql) for sql in expression.args.get("laterals", [])],
-                *[self.sql(sql) for sql in expression.args.get("joins", [])],
+                *[self.sql(sql) for sql in expression.args.get("joins") or []],
+                self.sql(expression, "match"),
+                *[self.sql(sql) for sql in expression.args.get("laterals") or []],
                self.sql(expression, "where"),
                self.sql(expression, "group"),
                self.sql(expression, "having"),
                self.sql(expression, "qualify"),
-                self.sql(expression, "window"),
+                self.seg("WINDOW ") + self.expressions(expression, "windows", flat=True)
+                if expression.args.get("windows")
+                else "",
                self.sql(expression, "distribute"),
                self.sql(expression, "sort"),
                self.sql(expression, "cluster"),
@ -99,6 +102,7 @@ class Oracle(Dialect):
    class Tokenizer(tokens.Tokenizer):
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
+            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
            "MINUS": TokenType.EXCEPT,
            "START": TokenType.BEGIN,
            "TOP": TokenType.TOP,
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
    no_paren_current_date_sql,
    no_tablesample_sql,
    no_trycast_sql,
+    rename_func,
    str_position_sql,
    trim_sql,
 )
@ -260,6 +261,16 @@ class Postgres(Dialect):
            "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
        }

+        BITWISE = {
+            **parser.Parser.BITWISE,  # type: ignore
+            TokenType.HASH: exp.BitwiseXor,
+        }
+
+        FACTOR = {
+            **parser.Parser.FACTOR,  # type: ignore
+            TokenType.CARET: exp.Pow,
+        }
+
    class Generator(generator.Generator):
        TYPE_MAPPING = {
            **generator.Generator.TYPE_MAPPING,  # type: ignore
@ -273,6 +284,7 @@ class Postgres(Dialect):

        TRANSFORMS = {
            **generator.Generator.TRANSFORMS,  # type: ignore
+            exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
            exp.ColumnDef: preprocess(
                [
                    _auto_increment_to_serial,
@ -285,11 +297,13 @@ class Postgres(Dialect):
            exp.JSONBExtract: lambda self, e: self.binary(e, "#>"),
            exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"),
            exp.JSONBContains: lambda self, e: self.binary(e, "?"),
+            exp.Pow: lambda self, e: self.binary(e, "^"),
            exp.CurrentDate: no_paren_current_date_sql,
            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
            exp.DateAdd: _date_add_sql("+"),
            exp.DateSub: _date_add_sql("-"),
            exp.DateDiff: _date_diff_sql,
+            exp.LogicalOr: rename_func("BOOL_OR"),
            exp.RegexpLike: lambda self, e: self.binary(e, "~"),
            exp.RegexpILike: lambda self, e: self.binary(e, "~*"),
            exp.StrPosition: str_position_sql,
--- a/sqlglot/dialects/presto.py
+++ b/sqlglot/dialects/presto.py
@ -174,6 +174,7 @@ class Presto(Dialect):
            "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
            "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
            "FROM_UNIXTIME": _from_unixtime,
+            "NOW": exp.CurrentTimestamp.from_arg_list,
            "STRPOS": lambda args: exp.StrPosition(
                this=seq_get(args, 0),
                substr=seq_get(args, 1),
@ -194,7 +195,6 @@ class Presto(Dialect):
        FUNCTION_PARSERS.pop("TRIM")

    class Generator(generator.Generator):
-
        STRUCT_DELIMITER = ("(", ")")

        ROOT_PROPERTIES = {exp.SchemaCommentProperty}
--- a/sqlglot/dialects/redshift.py
+++ b/sqlglot/dialects/redshift.py
@ -93,7 +93,7 @@ class Redshift(Postgres):
            rows = [tuple_exp.expressions for tuple_exp in expression.expressions]
            selects = []
            for i, row in enumerate(rows):
-                if i == 0:
+                if i == 0 and expression.alias:
                    row = [
                        exp.alias_(value, column_name)
                        for value, column_name in zip(row, expression.args["alias"].args["columns"])
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@ -178,11 +178,6 @@ class Snowflake(Dialect):
            ),
        }

-        PROPERTY_PARSERS = {
-            **parser.Parser.PROPERTY_PARSERS,
-            TokenType.PARTITION_BY: lambda self: self._parse_partitioned_by(),
-        }
-
    class Tokenizer(tokens.Tokenizer):
        QUOTES = ["'", "$$"]
        ESCAPES = ["\\", "'"]
@ -195,6 +190,7 @@ class Snowflake(Dialect):
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "EXCLUDE": TokenType.EXCEPT,
+            "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
            "RENAME": TokenType.REPLACE,
            "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
            "TIMESTAMP_NTZ": TokenType.TIMESTAMP,
--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@ -1,7 +1,7 @@
 from __future__ import annotations

 from sqlglot import exp, parser
-from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
+from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func, trim_sql
 from sqlglot.dialects.hive import Hive
 from sqlglot.helper import seq_get

@ -122,6 +122,7 @@ class Spark(Hive):
            exp.Reduce: rename_func("AGGREGATE"),
            exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
            exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
+            exp.Trim: trim_sql,
            exp.VariancePop: rename_func("VAR_POP"),
            exp.DateFromParts: rename_func("MAKE_DATE"),
            exp.LogicalOr: rename_func("BOOL_OR"),