Merging upstream version 18.2.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 20:58:22 +01:00 · 2025-02-13 20:58:22 +01:00 · 53cf4a81a6
commit 53cf4a81a6
parent 985db29269
124 changed files with 60313 additions and 50346 deletions
--- a/sqlglot/expressions.py
+++ b/sqlglot/expressions.py
@ -1035,12 +1035,13 @@ class Clone(Expression):
        "this": True,
        "when": False,
        "kind": False,
+        "shallow": False,
        "expression": False,
    }


 class Describe(Expression):
-    arg_types = {"this": True, "kind": False}
+    arg_types = {"this": True, "kind": False, "expressions": False}


 class Pragma(Expression):
@ -1070,6 +1071,8 @@ class Show(Expression):
        "like": False,
        "where": False,
        "db": False,
+        "scope": False,
+        "scope_kind": False,
        "full": False,
        "mutex": False,
        "query": False,
@ -1207,6 +1210,10 @@ class Comment(Expression):
    arg_types = {"this": True, "kind": True, "expression": True, "exists": False}


+class Comprehension(Expression):
+    arg_types = {"this": True, "expression": True, "iterator": True, "condition": False}
+
+
 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
 class MergeTreeTTLAction(Expression):
    arg_types = {
@ -1269,6 +1276,10 @@ class CheckColumnConstraint(ColumnConstraintKind):
    pass


+class ClusteredColumnConstraint(ColumnConstraintKind):
+    pass
+
+
 class CollateColumnConstraint(ColumnConstraintKind):
    pass

@ -1316,6 +1327,14 @@ class InlineLengthColumnConstraint(ColumnConstraintKind):
    pass


+class NonClusteredColumnConstraint(ColumnConstraintKind):
+    pass
+
+
+class NotForReplicationColumnConstraint(ColumnConstraintKind):
+    arg_types = {}
+
+
 class NotNullColumnConstraint(ColumnConstraintKind):
    arg_types = {"allow_null": False}

@ -1345,6 +1364,12 @@ class PathColumnConstraint(ColumnConstraintKind):
    pass


+# computed column expression
+# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-transact-sql?view=sql-server-ver16
+class ComputedColumnConstraint(ColumnConstraintKind):
+    arg_types = {"this": True, "persisted": False, "not_null": False}
+
+
 class Constraint(Expression):
    arg_types = {"this": True, "expressions": True}

@ -1489,6 +1514,15 @@ class Check(Expression):
    pass


+# https://docs.snowflake.com/en/sql-reference/constructs/connect-by
+class Connect(Expression):
+    arg_types = {"start": False, "connect": True}
+
+
+class Prior(Expression):
+    pass
+
+
 class Directory(Expression):
    # https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-dml-insert-overwrite-directory-hive.html
    arg_types = {"this": True, "local": False, "row_format": False}
@ -1578,6 +1612,7 @@ class Insert(DDL):
        "alternative": False,
        "where": False,
        "ignore": False,
+        "by_name": False,
    }

    def with_(
@ -2045,8 +2080,12 @@ class NoPrimaryIndexProperty(Property):
    arg_types = {}


+class OnProperty(Property):
+    arg_types = {"this": True}
+
+
 class OnCommitProperty(Property):
-    arg_type = {"delete": False}
+    arg_types = {"delete": False}


 class PartitionedByProperty(Property):
@ -2282,6 +2321,16 @@ class Subqueryable(Unionable):
    def named_selects(self) -> t.List[str]:
        raise NotImplementedError("Subqueryable objects must implement `named_selects`")

+    def select(
+        self,
+        *expressions: t.Optional[ExpOrStr],
+        append: bool = True,
+        dialect: DialectType = None,
+        copy: bool = True,
+        **opts,
+    ) -> Subqueryable:
+        raise NotImplementedError("Subqueryable objects must implement `select`")
+
    def with_(
        self,
        alias: ExpOrStr,
@ -2323,6 +2372,7 @@ QUERY_MODIFIERS = {
    "match": False,
    "laterals": False,
    "joins": False,
+    "connect": False,
    "pivots": False,
    "where": False,
    "group": False,
@ -2363,6 +2413,7 @@ class Table(Expression):
        "pivots": False,
        "hints": False,
        "system_time": False,
+        "version": False,
    }

    @property
@ -2403,21 +2454,13 @@ class Table(Expression):
        return parts


-# See the TSQL "Querying data in a system-versioned temporal table" page
-class SystemTime(Expression):
-    arg_types = {
-        "this": False,
-        "expression": False,
-        "kind": True,
-    }
-
-
 class Union(Subqueryable):
    arg_types = {
        "with": False,
        "this": True,
        "expression": True,
        "distinct": False,
+        "by_name": False,
        **QUERY_MODIFIERS,
    }

@ -2529,6 +2572,7 @@ class Update(Expression):
        "from": False,
        "where": False,
        "returning": False,
+        "order": False,
        "limit": False,
    }

@ -2545,6 +2589,20 @@ class Var(Expression):
    pass


+class Version(Expression):
+    """
+    Time travel, iceberg, bigquery etc
+    https://trino.io/docs/current/connector/iceberg.html?highlight=snapshot#using-snapshots
+    https://www.databricks.com/blog/2019/02/04/introducing-delta-time-travel-for-large-scale-data-lakes.html
+    https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#for_system_time_as_of
+    https://learn.microsoft.com/en-us/sql/relational-databases/tables/querying-data-in-a-system-versioned-temporal-table?view=sql-server-ver16
+    this is either TIMESTAMP or VERSION
+    kind is ("AS OF", "BETWEEN")
+    """
+
+    arg_types = {"this": True, "kind": True, "expression": False}
+
+
 class Schema(Expression):
    arg_types = {"this": False, "expressions": False}

@ -3263,6 +3321,23 @@ class Subquery(DerivedTable, Unionable):
            expression = expression.this
        return expression

+    def unwrap(self) -> Subquery:
+        expression = self
+        while expression.same_parent and expression.is_wrapper:
+            expression = t.cast(Subquery, expression.parent)
+        return expression
+
+    @property
+    def is_wrapper(self) -> bool:
+        """
+        Whether this Subquery acts as a simple wrapper around another expression.
+
+        SELECT * FROM (((SELECT * FROM t)))
+                      ^
+                      This corresponds to a "wrapper" Subquery node
+        """
+        return all(v is None for k, v in self.args.items() if k != "this")
+
    @property
    def is_star(self) -> bool:
        return self.this.is_star
@ -3313,7 +3388,7 @@ class Pivot(Expression):
    }


-class Window(Expression):
+class Window(Condition):
    arg_types = {
        "this": True,
        "partition_by": False,
@ -3375,7 +3450,7 @@ class Boolean(Condition):
    pass


-class DataTypeSize(Expression):
+class DataTypeParam(Expression):
    arg_types = {"this": True, "expression": False}


@ -3386,6 +3461,7 @@ class DataType(Expression):
        "nested": False,
        "values": False,
        "prefix": False,
+        "kind": False,
    }

    class Type(AutoName):
@ -3432,6 +3508,7 @@ class DataType(Expression):
        LOWCARDINALITY = auto()
        MAP = auto()
        MEDIUMBLOB = auto()
+        MEDIUMINT = auto()
        MEDIUMTEXT = auto()
        MONEY = auto()
        NCHAR = auto()
@ -3475,6 +3552,7 @@ class DataType(Expression):
        VARCHAR = auto()
        VARIANT = auto()
        XML = auto()
+        YEAR = auto()

    TEXT_TYPES = {
        Type.CHAR,
@ -3498,7 +3576,10 @@ class DataType(Expression):
        Type.DOUBLE,
    }

-    NUMERIC_TYPES = {*INTEGER_TYPES, *FLOAT_TYPES}
+    NUMERIC_TYPES = {
+        *INTEGER_TYPES,
+        *FLOAT_TYPES,
+    }

    TEMPORAL_TYPES = {
        Type.TIME,
@ -3511,23 +3592,39 @@ class DataType(Expression):
        Type.DATETIME64,
    }

-    META_TYPES = {"UNKNOWN", "NULL"}
-
    @classmethod
    def build(
-        cls, dtype: str | DataType | DataType.Type, dialect: DialectType = None, **kwargs
+        cls,
+        dtype: str | DataType | DataType.Type,
+        dialect: DialectType = None,
+        udt: bool = False,
+        **kwargs,
    ) -> DataType:
+        """
+        Constructs a DataType object.
+
+        Args:
+            dtype: the data type of interest.
+            dialect: the dialect to use for parsing `dtype`, in case it's a string.
+            udt: when set to True, `dtype` will be used as-is if it can't be parsed into a
+                DataType, thus creating a user-defined type.
+            kawrgs: additional arguments to pass in the constructor of DataType.
+
+        Returns:
+            The constructed DataType object.
+        """
        from sqlglot import parse_one

        if isinstance(dtype, str):
-            upper = dtype.upper()
-            if upper in DataType.META_TYPES:
-                data_type_exp: t.Optional[Expression] = DataType(this=DataType.Type[upper])
-            else:
-                data_type_exp = parse_one(dtype, read=dialect, into=DataType)
+            if dtype.upper() == "UNKNOWN":
+                return DataType(this=DataType.Type.UNKNOWN, **kwargs)

-            if data_type_exp is None:
-                raise ValueError(f"Unparsable data type value: {dtype}")
+            try:
+                data_type_exp = parse_one(dtype, read=dialect, into=DataType)
+            except ParseError:
+                if udt:
+                    return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs)
+                raise
        elif isinstance(dtype, DataType.Type):
            data_type_exp = DataType(this=dtype)
        elif isinstance(dtype, DataType):
@ -3538,7 +3635,31 @@ class DataType(Expression):
        return DataType(**{**data_type_exp.args, **kwargs})

    def is_type(self, *dtypes: str | DataType | DataType.Type) -> bool:
-        return any(self.this == DataType.build(dtype).this for dtype in dtypes)
+        """
+        Checks whether this DataType matches one of the provided data types. Nested types or precision
+        will be compared using "structural equivalence" semantics, so e.g. array<int> != array<float>.
+
+        Args:
+            dtypes: the data types to compare this DataType to.
+
+        Returns:
+            True, if and only if there is a type in `dtypes` which is equal to this DataType.
+        """
+        for dtype in dtypes:
+            other = DataType.build(dtype, udt=True)
+
+            if (
+                other.expressions
+                or self.this == DataType.Type.USERDEFINED
+                or other.this == DataType.Type.USERDEFINED
+            ):
+                matches = self == other
+            else:
+                matches = self.this == other.this
+
+            if matches:
+                return True
+        return False


 # https://www.postgresql.org/docs/15/datatype-pseudo.html
@ -3546,6 +3667,11 @@ class PseudoType(Expression):
    pass


+# https://www.postgresql.org/docs/15/datatype-oid.html
+class ObjectIdentifier(Expression):
+    pass
+
+
 # WHERE x <OP> EXISTS|ALL|ANY|SOME(SELECT ...)
 class SubqueryPredicate(Predicate):
    pass
@ -4005,6 +4131,7 @@ class ArrayAny(Func):


 class ArrayConcat(Func):
+    _sql_names = ["ARRAY_CONCAT", "ARRAY_CAT"]
    arg_types = {"this": True, "expressions": False}
    is_var_len_args = True

@ -4047,7 +4174,15 @@ class Avg(AggFunc):


 class AnyValue(AggFunc):
-    arg_types = {"this": True, "having": False, "max": False}
+    arg_types = {"this": True, "having": False, "max": False, "ignore_nulls": False}
+
+
+class First(Func):
+    arg_types = {"this": True, "ignore_nulls": False}
+
+
+class Last(Func):
+    arg_types = {"this": True, "ignore_nulls": False}


 class Case(Func):
@ -4086,18 +4221,29 @@ class Cast(Func):
        return self.name

    def is_type(self, *dtypes: str | DataType | DataType.Type) -> bool:
+        """
+        Checks whether this Cast's DataType matches one of the provided data types. Nested types
+        like arrays or structs will be compared using "structural equivalence" semantics, so e.g.
+        array<int> != array<float>.
+
+        Args:
+            dtypes: the data types to compare this Cast's DataType to.
+
+        Returns:
+            True, if and only if there is a type in `dtypes` which is equal to this Cast's DataType.
+        """
        return self.to.is_type(*dtypes)


-class CastToStrType(Func):
-    arg_types = {"this": True, "expression": True}
-
-
-class Collate(Binary):
+class TryCast(Cast):
    pass


-class TryCast(Cast):
+class CastToStrType(Func):
+    arg_types = {"this": True, "to": True}
+
+
+class Collate(Binary):
    pass


@ -4310,7 +4456,7 @@ class Greatest(Func):
    is_var_len_args = True


-class GroupConcat(Func):
+class GroupConcat(AggFunc):
    arg_types = {"this": True, "separator": False}


@ -4648,8 +4794,19 @@ class StrToUnix(Func):
    arg_types = {"this": False, "format": False}


+# https://prestodb.io/docs/current/functions/string.html
+# https://spark.apache.org/docs/latest/api/sql/index.html#str_to_map
+class StrToMap(Func):
+    arg_types = {
+        "this": True,
+        "pair_delim": False,
+        "key_value_delim": False,
+        "duplicate_resolution_callback": False,
+    }
+
+
 class NumberToStr(Func):
-    arg_types = {"this": True, "format": True}
+    arg_types = {"this": True, "format": True, "culture": False}


 class FromBase(Func):
@ -4665,6 +4822,13 @@ class StructExtract(Func):
    arg_types = {"this": True, "expression": True}


+# https://learn.microsoft.com/en-us/sql/t-sql/functions/stuff-transact-sql?view=sql-server-ver16
+# https://docs.snowflake.com/en/sql-reference/functions/insert
+class Stuff(Func):
+    _sql_names = ["STUFF", "INSERT"]
+    arg_types = {"this": True, "start": True, "length": True, "expression": True}
+
+
 class Sum(AggFunc):
    pass

@ -4686,7 +4850,7 @@ class StddevSamp(AggFunc):


 class TimeToStr(Func):
-    arg_types = {"this": True, "format": True}
+    arg_types = {"this": True, "format": True, "culture": False}


 class TimeToTimeStr(Func):
@ -5724,9 +5888,9 @@ def table_(
        The new Table instance.
    """
    return Table(
-        this=to_identifier(table, quoted=quoted),
-        db=to_identifier(db, quoted=quoted),
-        catalog=to_identifier(catalog, quoted=quoted),
+        this=to_identifier(table, quoted=quoted) if table else None,
+        db=to_identifier(db, quoted=quoted) if db else None,
+        catalog=to_identifier(catalog, quoted=quoted) if catalog else None,
        alias=TableAlias(this=to_identifier(alias)) if alias else None,
    )

@ -5844,8 +6008,8 @@ def convert(value: t.Any, copy: bool = False) -> Expression:
        return Array(expressions=[convert(v, copy=copy) for v in value])
    if isinstance(value, dict):
        return Map(
-            keys=[convert(k, copy=copy) for k in value],
-            values=[convert(v, copy=copy) for v in value.values()],
+            keys=Array(expressions=[convert(k, copy=copy) for k in value]),
+            values=Array(expressions=[convert(v, copy=copy) for v in value.values()]),
        )
    raise ValueError(f"Cannot convert {value}")