Merging upstream version 6.1.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 08:04:41 +01:00 · 2025-02-13 08:04:41 +01:00 · 08ecea3adf
commit 08ecea3adf
parent 3c6d649c90
61 changed files with 1844 additions and 1555 deletions
--- a/sqlglot/parser.py
+++ b/sqlglot/parser.py
@ -78,6 +78,7 @@ class Parser:
        TokenType.TEXT,
        TokenType.BINARY,
        TokenType.JSON,
+        TokenType.INTERVAL,
        TokenType.TIMESTAMP,
        TokenType.TIMESTAMPTZ,
        TokenType.DATETIME,
@ -85,6 +86,12 @@ class Parser:
        TokenType.DECIMAL,
        TokenType.UUID,
        TokenType.GEOGRAPHY,
+        TokenType.GEOMETRY,
+        TokenType.HLLSKETCH,
+        TokenType.SUPER,
+        TokenType.SERIAL,
+        TokenType.SMALLSERIAL,
+        TokenType.BIGSERIAL,
        *NESTED_TYPE_TOKENS,
    }

@ -100,13 +107,14 @@ class Parser:
    ID_VAR_TOKENS = {
        TokenType.VAR,
        TokenType.ALTER,
+        TokenType.ALWAYS,
        TokenType.BEGIN,
+        TokenType.BOTH,
        TokenType.BUCKET,
        TokenType.CACHE,
        TokenType.COLLATE,
        TokenType.COMMIT,
        TokenType.CONSTRAINT,
-        TokenType.CONVERT,
        TokenType.DEFAULT,
        TokenType.DELETE,
        TokenType.ENGINE,
@ -115,14 +123,19 @@ class Parser:
        TokenType.FALSE,
        TokenType.FIRST,
        TokenType.FOLLOWING,
+        TokenType.FOR,
        TokenType.FORMAT,
        TokenType.FUNCTION,
+        TokenType.GENERATED,
+        TokenType.IDENTITY,
        TokenType.IF,
        TokenType.INDEX,
        TokenType.ISNULL,
        TokenType.INTERVAL,
        TokenType.LAZY,
+        TokenType.LEADING,
        TokenType.LOCATION,
+        TokenType.NATURAL,
        TokenType.NEXT,
        TokenType.ONLY,
        TokenType.OPTIMIZE,
@ -141,6 +154,7 @@ class Parser:
        TokenType.TABLE_FORMAT,
        TokenType.TEMPORARY,
        TokenType.TOP,
+        TokenType.TRAILING,
        TokenType.TRUNCATE,
        TokenType.TRUE,
        TokenType.UNBOUNDED,
@ -150,18 +164,15 @@ class Parser:
        *TYPE_TOKENS,
    }

-    CASTS = {
-        TokenType.CAST,
-        TokenType.TRY_CAST,
-    }
+    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.NATURAL}
+
+    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}

    FUNC_TOKENS = {
-        TokenType.CONVERT,
        TokenType.CURRENT_DATE,
        TokenType.CURRENT_DATETIME,
        TokenType.CURRENT_TIMESTAMP,
        TokenType.CURRENT_TIME,
-        TokenType.EXTRACT,
        TokenType.FILTER,
        TokenType.FIRST,
        TokenType.FORMAT,
@ -178,7 +189,6 @@ class Parser:
        TokenType.DATETIME,
        TokenType.TIMESTAMP,
        TokenType.TIMESTAMPTZ,
-        *CASTS,
        *NESTED_TYPE_TOKENS,
        *SUBQUERY_PREDICATES,
    }
@ -215,6 +225,7 @@ class Parser:

    FACTOR = {
        TokenType.DIV: exp.IntDiv,
+        TokenType.LR_ARROW: exp.Distance,
        TokenType.SLASH: exp.Div,
        TokenType.STAR: exp.Mul,
    }
@ -299,14 +310,13 @@ class Parser:
    PRIMARY_PARSERS = {
        TokenType.STRING: lambda _, token: exp.Literal.string(token.text),
        TokenType.NUMBER: lambda _, token: exp.Literal.number(token.text),
-        TokenType.STAR: lambda self, _: exp.Star(
-            **{"except": self._parse_except(), "replace": self._parse_replace()}
-        ),
+        TokenType.STAR: lambda self, _: exp.Star(**{"except": self._parse_except(), "replace": self._parse_replace()}),
        TokenType.NULL: lambda *_: exp.Null(),
        TokenType.TRUE: lambda *_: exp.Boolean(this=True),
        TokenType.FALSE: lambda *_: exp.Boolean(this=False),
        TokenType.PLACEHOLDER: lambda *_: exp.Placeholder(),
        TokenType.BIT_STRING: lambda _, token: exp.BitString(this=token.text),
+        TokenType.HEX_STRING: lambda _, token: exp.HexString(this=token.text),
        TokenType.INTRODUCER: lambda self, token: self.expression(
            exp.Introducer,
            this=token.text,
@ -319,13 +329,16 @@ class Parser:
        TokenType.IN: lambda self, this: self._parse_in(this),
        TokenType.IS: lambda self, this: self._parse_is(this),
        TokenType.LIKE: lambda self, this: self._parse_escape(
-            self.expression(exp.Like, this=this, expression=self._parse_type())
+            self.expression(exp.Like, this=this, expression=self._parse_bitwise())
        ),
        TokenType.ILIKE: lambda self, this: self._parse_escape(
-            self.expression(exp.ILike, this=this, expression=self._parse_type())
+            self.expression(exp.ILike, this=this, expression=self._parse_bitwise())
        ),
        TokenType.RLIKE: lambda self, this: self.expression(
-            exp.RegexpLike, this=this, expression=self._parse_type()
+            exp.RegexpLike, this=this, expression=self._parse_bitwise()
+        ),
+        TokenType.SIMILAR_TO: lambda self, this: self.expression(
+            exp.SimilarTo, this=this, expression=self._parse_bitwise()
        ),
    }

@ -363,28 +376,21 @@ class Parser:
    }

    FUNCTION_PARSERS = {
-        TokenType.CONVERT: lambda self, _: self._parse_convert(),
-        TokenType.EXTRACT: lambda self, _: self._parse_extract(),
-        **{
-            token_type: lambda self, token_type: self._parse_cast(
-                self.STRICT_CAST and token_type == TokenType.CAST
-            )
-            for token_type in CASTS
-        },
+        "CONVERT": lambda self: self._parse_convert(),
+        "EXTRACT": lambda self: self._parse_extract(),
+        "SUBSTRING": lambda self: self._parse_substring(),
+        "TRIM": lambda self: self._parse_trim(),
+        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
+        "TRY_CAST": lambda self: self._parse_cast(False),
    }

    QUERY_MODIFIER_PARSERS = {
-        "laterals": lambda self: self._parse_laterals(),
-        "joins": lambda self: self._parse_joins(),
        "where": lambda self: self._parse_where(),
        "group": lambda self: self._parse_group(),
        "having": lambda self: self._parse_having(),
        "qualify": lambda self: self._parse_qualify(),
-        "window": lambda self: self._match(TokenType.WINDOW)
-        and self._parse_window(self._parse_id_var(), alias=True),
-        "distribute": lambda self: self._parse_sort(
-            TokenType.DISTRIBUTE_BY, exp.Distribute
-        ),
+        "window": lambda self: self._match(TokenType.WINDOW) and self._parse_window(self._parse_id_var(), alias=True),
+        "distribute": lambda self: self._parse_sort(TokenType.DISTRIBUTE_BY, exp.Distribute),
        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
        "order": lambda self: self._parse_order(),
@ -392,6 +398,8 @@ class Parser:
        "offset": lambda self: self._parse_offset(),
    }

+    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
+
    CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX}

    STRICT_CAST = True
@ -457,9 +465,7 @@ class Parser:
        Returns
            the list of syntax trees (:class:`~sqlglot.expressions.Expression`).
        """
-        return self._parse(
-            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
-        )
+        return self._parse(parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql)

    def parse_into(self, expression_types, raw_tokens, sql=None):
        for expression_type in ensure_list(expression_types):
@ -532,21 +538,13 @@ class Parser:

        for k in expression.args:
            if k not in expression.arg_types:
-                self.raise_error(
-                    f"Unexpected keyword: '{k}' for {expression.__class__}"
-                )
+                self.raise_error(f"Unexpected keyword: '{k}' for {expression.__class__}")
        for k, mandatory in expression.arg_types.items():
            v = expression.args.get(k)
            if mandatory and (v is None or (isinstance(v, list) and not v)):
-                self.raise_error(
-                    f"Required keyword: '{k}' missing for {expression.__class__}"
-                )
+                self.raise_error(f"Required keyword: '{k}' missing for {expression.__class__}")

-        if (
-            args
-            and len(args) > len(expression.arg_types)
-            and not expression.is_var_len_args
-        ):
+        if args and len(args) > len(expression.arg_types) and not expression.is_var_len_args:
            self.raise_error(
                f"The number of provided arguments ({len(args)}) is greater than "
                f"the maximum number of supported arguments ({len(expression.arg_types)})"
@ -594,11 +592,7 @@ class Parser:
            )

        expression = self._parse_expression()
-        expression = (
-            self._parse_set_operations(expression)
-            if expression
-            else self._parse_select()
-        )
+        expression = self._parse_set_operations(expression) if expression else self._parse_select()
        self._parse_query_modifiers(expression)
        return expression

@ -618,11 +612,7 @@ class Parser:
        )

    def _parse_exists(self, not_=False):
-        return (
-            self._match(TokenType.IF)
-            and (not not_ or self._match(TokenType.NOT))
-            and self._match(TokenType.EXISTS)
-        )
+        return self._match(TokenType.IF) and (not not_ or self._match(TokenType.NOT)) and self._match(TokenType.EXISTS)

    def _parse_create(self):
        replace = self._match(TokenType.OR) and self._match(TokenType.REPLACE)
@ -647,11 +637,9 @@ class Parser:
            this = self._parse_index()
        elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
            this = self._parse_table(schema=True)
-            properties = self._parse_properties(
-                this if isinstance(this, exp.Schema) else None
-            )
+            properties = self._parse_properties(this if isinstance(this, exp.Schema) else None)
            if self._match(TokenType.ALIAS):
-                expression = self._parse_select()
+                expression = self._parse_select(nested=True)

        return self.expression(
            exp.Create,
@ -682,9 +670,7 @@ class Parser:
                if schema and not isinstance(value, exp.Schema):
                    columns = {v.name.upper() for v in value.expressions}
                    partitions = [
-                        expression
-                        for expression in schema.expressions
-                        if expression.this.name.upper() in columns
+                        expression for expression in schema.expressions if expression.this.name.upper() in columns
                    ]
                    schema.set(
                        "expressions",
@ -811,7 +797,7 @@ class Parser:
            this=self._parse_table(schema=True),
            exists=self._parse_exists(),
            partition=self._parse_partition(),
-            expression=self._parse_select(),
+            expression=self._parse_select(nested=True),
            overwrite=overwrite,
        )

@ -829,8 +815,7 @@ class Parser:
            exp.Update,
            **{
                "this": self._parse_table(schema=True),
-                "expressions": self._match(TokenType.SET)
-                and self._parse_csv(self._parse_equality),
+                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
                "from": self._parse_from(),
                "where": self._parse_where(),
            },
@ -865,7 +850,7 @@ class Parser:
            this=table,
            lazy=lazy,
            options=options,
-            expression=self._parse_select(),
+            expression=self._parse_select(nested=True),
        )

    def _parse_partition(self):
@ -894,9 +879,7 @@ class Parser:
        self._match_r_paren()
        return self.expression(exp.Tuple, expressions=expressions)

-    def _parse_select(self, table=None):
-        index = self._index
-
+    def _parse_select(self, nested=False, table=False):
        if self._match(TokenType.SELECT):
            hint = self._parse_hint()
            all_ = self._match(TokenType.ALL)
@ -912,9 +895,7 @@ class Parser:
                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")

            limit = self._parse_limit(top=True)
-            expressions = self._parse_csv(
-                lambda: self._parse_annotation(self._parse_expression())
-            )
+            expressions = self._parse_csv(lambda: self._parse_annotation(self._parse_expression()))

            this = self.expression(
                exp.Select,
@ -960,19 +941,13 @@ class Parser:
                )
            else:
                self.raise_error(f"{this.key} does not support CTE")
-        elif self._match(TokenType.L_PAREN):
-            this = self._parse_table() if table else self._parse_select()
-
-            if this:
-                self._parse_query_modifiers(this)
-                self._match_r_paren()
-                this = self._parse_subquery(this)
-            else:
-                self._retreat(index)
+        elif (table or nested) and self._match(TokenType.L_PAREN):
+            this = self._parse_table() if table else self._parse_select(nested=True)
+            self._parse_query_modifiers(this)
+            self._match_r_paren()
+            this = self._parse_subquery(this)
        elif self._match(TokenType.VALUES):
-            this = self.expression(
-                exp.Values, expressions=self._parse_csv(self._parse_value)
-            )
+            this = self.expression(exp.Values, expressions=self._parse_csv(self._parse_value))
            alias = self._parse_table_alias()
            if alias:
                this = self.expression(exp.Subquery, this=this, alias=alias)
@ -1001,7 +976,7 @@ class Parser:

    def _parse_table_alias(self):
        any_token = self._match(TokenType.ALIAS)
-        alias = self._parse_id_var(any_token)
+        alias = self._parse_id_var(any_token=any_token, tokens=self.TABLE_ALIAS_TOKENS)
        columns = None

        if self._match(TokenType.L_PAREN):
@ -1021,9 +996,24 @@ class Parser:
        return self.expression(exp.Subquery, this=this, alias=self._parse_table_alias())

    def _parse_query_modifiers(self, this):
-        if not isinstance(this, (exp.Subquery, exp.Subqueryable)):
+        if not isinstance(this, self.MODIFIABLES):
            return

+        table = isinstance(this, exp.Table)
+
+        while True:
+            lateral = self._parse_lateral()
+            join = self._parse_join()
+            comma = None if table else self._match(TokenType.COMMA)
+            if lateral:
+                this.append("laterals", lateral)
+            if join:
+                this.append("joins", join)
+            if comma:
+                this.args["from"].append("expressions", self._parse_table())
+            if not (lateral or join or comma):
+                break
+
        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
            expression = parser(self)

@ -1032,9 +1022,7 @@ class Parser:

    def _parse_annotation(self, expression):
        if self._match(TokenType.ANNOTATION):
-            return self.expression(
-                exp.Annotation, this=self._prev.text, expression=expression
-            )
+            return self.expression(exp.Annotation, this=self._prev.text, expression=expression)

        return expression

@ -1052,16 +1040,16 @@ class Parser:

        return self.expression(exp.From, expressions=self._parse_csv(self._parse_table))

-    def _parse_laterals(self):
-        return self._parse_all(self._parse_lateral)
-
    def _parse_lateral(self):
        if not self._match(TokenType.LATERAL):
            return None

-        if not self._match(TokenType.VIEW):
-            self.raise_error("Expected VIEW after LATERAL")
+        subquery = self._parse_select(table=True)

+        if subquery:
+            return self.expression(exp.Lateral, this=subquery)
+
+        self._match(TokenType.VIEW)
        outer = self._match(TokenType.OUTER)

        return self.expression(
@ -1071,31 +1059,27 @@ class Parser:
            alias=self.expression(
                exp.TableAlias,
                this=self._parse_id_var(any_token=False),
-                columns=(
-                    self._parse_csv(self._parse_id_var)
-                    if self._match(TokenType.ALIAS)
-                    else None
-                ),
+                columns=(self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else None),
            ),
        )

-    def _parse_joins(self):
-        return self._parse_all(self._parse_join)
-
    def _parse_join_side_and_kind(self):
        return (
+            self._match(TokenType.NATURAL) and self._prev,
            self._match_set(self.JOIN_SIDES) and self._prev,
            self._match_set(self.JOIN_KINDS) and self._prev,
        )

    def _parse_join(self):
-        side, kind = self._parse_join_side_and_kind()
+        natural, side, kind = self._parse_join_side_and_kind()

        if not self._match(TokenType.JOIN):
            return None

        kwargs = {"this": self._parse_table()}

+        if natural:
+            kwargs["natural"] = True
        if side:
            kwargs["side"] = side.text
        if kind:
@ -1120,6 +1104,11 @@ class Parser:
        )

    def _parse_table(self, schema=False):
+        lateral = self._parse_lateral()
+
+        if lateral:
+            return lateral
+
        unnest = self._parse_unnest()

        if unnest:
@ -1172,9 +1161,7 @@ class Parser:
        expressions = self._parse_csv(self._parse_column)
        self._match_r_paren()

-        ordinality = bool(
-            self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)
-        )
+        ordinality = bool(self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY))

        alias = self._parse_table_alias()

@ -1280,17 +1267,13 @@ class Parser:
        if not self._match(TokenType.ORDER_BY):
            return this

-        return self.expression(
-            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
-        )
+        return self.expression(exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered))

    def _parse_sort(self, token_type, exp_class):
        if not self._match(token_type):
            return None

-        return self.expression(
-            exp_class, expressions=self._parse_csv(self._parse_ordered)
-        )
+        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))

    def _parse_ordered(self):
        this = self._parse_conjunction()
@ -1305,22 +1288,17 @@ class Parser:
        if (
            not explicitly_null_ordered
            and (
-                (asc and self.null_ordering == "nulls_are_small")
-                or (desc and self.null_ordering != "nulls_are_small")
+                (asc and self.null_ordering == "nulls_are_small") or (desc and self.null_ordering != "nulls_are_small")
            )
            and self.null_ordering != "nulls_are_last"
        ):
            nulls_first = True

-        return self.expression(
-            exp.Ordered, this=this, desc=desc, nulls_first=nulls_first
-        )
+        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)

    def _parse_limit(self, this=None, top=False):
        if self._match(TokenType.TOP if top else TokenType.LIMIT):
-            return self.expression(
-                exp.Limit, this=this, expression=self._parse_number()
-            )
+            return self.expression(exp.Limit, this=this, expression=self._parse_number())
        if self._match(TokenType.FETCH):
            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
            direction = self._prev.text if direction else "FIRST"
@ -1354,7 +1332,7 @@ class Parser:
            expression,
            this=this,
            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
-            expression=self._parse_select(),
+            expression=self._parse_select(nested=True),
        )

    def _parse_expression(self):
@ -1396,9 +1374,7 @@ class Parser:
            this = self.expression(exp.In, this=this, unnest=unnest)
        else:
            self._match_l_paren()
-            expressions = self._parse_csv(
-                lambda: self._parse_select() or self._parse_expression()
-            )
+            expressions = self._parse_csv(lambda: self._parse_select() or self._parse_expression())

            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
                this = self.expression(exp.In, this=this, query=expressions[0])
@ -1430,13 +1406,9 @@ class Parser:
                    expression=self._parse_term(),
                )
            elif self._match_pair(TokenType.LT, TokenType.LT):
-                this = self.expression(
-                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
-                )
+                this = self.expression(exp.BitwiseLeftShift, this=this, expression=self._parse_term())
            elif self._match_pair(TokenType.GT, TokenType.GT):
-                this = self.expression(
-                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
-                )
+                this = self.expression(exp.BitwiseRightShift, this=this, expression=self._parse_term())
            else:
                break

@ -1524,7 +1496,7 @@ class Parser:
                self.raise_error("Expecting >")

        if type_token in self.TIMESTAMPS:
-            tz = self._match(TokenType.WITH_TIME_ZONE)
+            tz = self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ
            self._match(TokenType.WITHOUT_TIME_ZONE)
            if tz:
                return exp.DataType(
@ -1594,16 +1566,14 @@ class Parser:
            if query:
                expressions = [query]
            else:
-                expressions = self._parse_csv(
-                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
-                )
+                expressions = self._parse_csv(lambda: self._parse_alias(self._parse_conjunction(), explicit=True))

            this = list_get(expressions, 0)
            self._parse_query_modifiers(this)
            self._match_r_paren()

            if isinstance(this, exp.Subqueryable):
-                return self._parse_subquery(this)
+                return self._parse_set_operations(self._parse_subquery(this))
            if len(expressions) > 1:
                return self.expression(exp.Tuple, expressions=expressions)
            return self.expression(exp.Paren, this=this)
@ -1611,11 +1581,7 @@ class Parser:
        return None

    def _parse_field(self, any_token=False):
-        return (
-            self._parse_primary()
-            or self._parse_function()
-            or self._parse_id_var(any_token)
-        )
+        return self._parse_primary() or self._parse_function() or self._parse_id_var(any_token)

    def _parse_function(self):
        if not self._curr:
@ -1628,21 +1594,22 @@ class Parser:

        if not self._next or self._next.token_type != TokenType.L_PAREN:
            if token_type in self.NO_PAREN_FUNCTIONS:
-                return self.expression(
-                    self._advance() or self.NO_PAREN_FUNCTIONS[token_type]
-                )
+                return self.expression(self._advance() or self.NO_PAREN_FUNCTIONS[token_type])
            return None

        if token_type not in self.FUNC_TOKENS:
            return None

-        if self._match_set(self.FUNCTION_PARSERS):
-            self._advance()
-            this = self.FUNCTION_PARSERS[token_type](self, token_type)
+        this = self._curr.text
+        upper = this.upper()
+        self._advance(2)
+
+        parser = self.FUNCTION_PARSERS.get(upper)
+
+        if parser:
+            this = parser(self)
        else:
            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
-            this = self._curr.text
-            self._advance(2)

            if subquery_predicate and self._curr.token_type in (
                TokenType.SELECT,
@ -1652,7 +1619,7 @@ class Parser:
                self._match_r_paren()
                return this

-            function = self.FUNCTIONS.get(this.upper())
+            function = self.FUNCTIONS.get(upper)
            args = self._parse_csv(self._parse_lambda)

            if function:
@ -1700,10 +1667,7 @@ class Parser:
            self._retreat(index)
            return this

-        args = self._parse_csv(
-            lambda: self._parse_constraint()
-            or self._parse_column_def(self._parse_field())
-        )
+        args = self._parse_csv(lambda: self._parse_constraint() or self._parse_column_def(self._parse_field(True)))
        self._match_r_paren()
        return self.expression(exp.Schema, this=this, expressions=args)

@ -1720,12 +1684,9 @@ class Parser:
                break
            constraints.append(constraint)

-        return self.expression(
-            exp.ColumnDef, this=this, kind=kind, constraints=constraints
-        )
+        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)

    def _parse_column_constraint(self):
-        kind = None
        this = None

        if self._match(TokenType.CONSTRAINT):
@ -1735,28 +1696,28 @@ class Parser:
            kind = exp.AutoIncrementColumnConstraint()
        elif self._match(TokenType.CHECK):
            self._match_l_paren()
-            kind = self.expression(
-                exp.CheckColumnConstraint, this=self._parse_conjunction()
-            )
+            kind = self.expression(exp.CheckColumnConstraint, this=self._parse_conjunction())
            self._match_r_paren()
        elif self._match(TokenType.COLLATE):
            kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
        elif self._match(TokenType.DEFAULT):
-            kind = self.expression(
-                exp.DefaultColumnConstraint, this=self._parse_field()
-            )
-        elif self._match(TokenType.NOT) and self._match(TokenType.NULL):
+            kind = self.expression(exp.DefaultColumnConstraint, this=self._parse_field())
+        elif self._match_pair(TokenType.NOT, TokenType.NULL):
            kind = exp.NotNullColumnConstraint()
        elif self._match(TokenType.SCHEMA_COMMENT):
-            kind = self.expression(
-                exp.CommentColumnConstraint, this=self._parse_string()
-            )
+            kind = self.expression(exp.CommentColumnConstraint, this=self._parse_string())
        elif self._match(TokenType.PRIMARY_KEY):
            kind = exp.PrimaryKeyColumnConstraint()
        elif self._match(TokenType.UNIQUE):
            kind = exp.UniqueColumnConstraint()
-
-        if kind is None:
+        elif self._match(TokenType.GENERATED):
+            if self._match(TokenType.BY_DEFAULT):
+                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=False)
+            else:
+                self._match(TokenType.ALWAYS)
+                kind = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
+            self._match_pair(TokenType.ALIAS, TokenType.IDENTITY)
+        else:
            return None

        return self.expression(exp.ColumnConstraint, this=this, kind=kind)
@ -1864,9 +1825,7 @@ class Parser:
        if not self._match(TokenType.END):
            self.raise_error("Expected END after CASE", self._prev)

-        return self._parse_window(
-            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
-        )
+        return self._parse_window(self.expression(exp.Case, this=expression, ifs=ifs, default=default))

    def _parse_if(self):
        if self._match(TokenType.L_PAREN):
@ -1889,7 +1848,7 @@ class Parser:
        if not self._match(TokenType.FROM):
            self.raise_error("Expected FROM after EXTRACT", self._prev)

-        return self.expression(exp.Extract, this=this, expression=self._parse_type())
+        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())

    def _parse_cast(self, strict):
        this = self._parse_conjunction()
@ -1917,12 +1876,54 @@ class Parser:
            to = None
        return self.expression(exp.Cast, this=this, to=to)

+    def _parse_substring(self):
+        # Postgres supports the form: substring(string [from int] [for int])
+        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
+
+        args = self._parse_csv(self._parse_bitwise)
+
+        if self._match(TokenType.FROM):
+            args.append(self._parse_bitwise())
+            if self._match(TokenType.FOR):
+                args.append(self._parse_bitwise())
+
+        this = exp.Substring.from_arg_list(args)
+        self.validate_expression(this, args)
+
+        return this
+
+    def _parse_trim(self):
+        # https://www.w3resource.com/sql/character-functions/trim.php
+        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
+
+        position = None
+        collation = None
+
+        if self._match_set(self.TRIM_TYPES):
+            position = self._prev.text.upper()
+
+        expression = self._parse_term()
+        if self._match(TokenType.FROM):
+            this = self._parse_term()
+        else:
+            this = expression
+            expression = None
+
+        if self._match(TokenType.COLLATE):
+            collation = self._parse_term()
+
+        return self.expression(
+            exp.Trim,
+            this=this,
+            position=position,
+            expression=expression,
+            collation=collation,
+        )
+
    def _parse_window(self, this, alias=False):
        if self._match(TokenType.FILTER):
            self._match_l_paren()
-            this = self.expression(
-                exp.Filter, this=this, expression=self._parse_where()
-            )
+            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
            self._match_r_paren()

        if self._match(TokenType.WITHIN_GROUP):
@ -1935,6 +1936,25 @@ class Parser:
            self._match_r_paren()
            return this

+        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
+        # Some dialects choose to implement and some do not.
+        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
+
+        # There is some code above in _parse_lambda that handles
+        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
+
+        # The below changes handle
+        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
+
+        # Oracle allows both formats
+        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
+        #   and Snowflake chose to do the same for familiarity
+        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
+        if self._match(TokenType.IGNORE_NULLS):
+            this = self.expression(exp.IgnoreNulls, this=this)
+        elif self._match(TokenType.RESPECT_NULLS):
+            this = self.expression(exp.RespectNulls, this=this)
+
        # bigquery select from window x AS (partition by ...)
        if alias:
            self._match(TokenType.ALIAS)
@ -1992,13 +2012,9 @@ class Parser:
        self._match(TokenType.BETWEEN)

        return {
-            "value": (
-                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW))
-                and self._prev.text
-            )
+            "value": (self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text)
            or self._parse_bitwise(),
-            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING))
-            and self._prev.text,
+            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
        }

    def _parse_alias(self, this, explicit=False):
@ -2023,22 +2039,16 @@ class Parser:

        return this

-    def _parse_id_var(self, any_token=True):
+    def _parse_id_var(self, any_token=True, tokens=None):
        identifier = self._parse_identifier()

        if identifier:
            return identifier

-        if (
-            any_token
-            and self._curr
-            and self._curr.token_type not in self.RESERVED_KEYWORDS
-        ):
+        if any_token and self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
            return self._advance() or exp.Identifier(this=self._prev.text, quoted=False)

-        return self._match_set(self.ID_VAR_TOKENS) and exp.Identifier(
-            this=self._prev.text, quoted=False
-        )
+        return self._match_set(tokens or self.ID_VAR_TOKENS) and exp.Identifier(this=self._prev.text, quoted=False)

    def _parse_string(self):
        if self._match(TokenType.STRING):
@ -2077,9 +2087,7 @@ class Parser:

    def _parse_star(self):
        if self._match(TokenType.STAR):
-            return exp.Star(
-                **{"except": self._parse_except(), "replace": self._parse_replace()}
-            )
+            return exp.Star(**{"except": self._parse_except(), "replace": self._parse_replace()})
        return None

    def _parse_placeholder(self):
@ -2117,15 +2125,10 @@ class Parser:
        this = parse()

        while self._match_set(expressions):
-            this = self.expression(
-                expressions[self._prev.token_type], this=this, expression=parse()
-            )
+            this = self.expression(expressions[self._prev.token_type], this=this, expression=parse())

        return this

-    def _parse_all(self, parse):
-        return list(iter(parse, None))
-
    def _parse_wrapped_id_vars(self):
        self._match_l_paren()
        expressions = self._parse_csv(self._parse_id_var)
@ -2156,10 +2159,7 @@ class Parser:
        if not self._curr or not self._next:
            return None

-        if (
-            self._curr.token_type == token_type_a
-            and self._next.token_type == token_type_b
-        ):
+        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
            if advance:
                self._advance(2)
            return True