sqlglot/sqlglot/parser.py

import logging

from sqlglot import exp
from sqlglot.errors import ErrorLevel, ParseError, concat_errors
from sqlglot.helper import apply_index_offset, ensure_list, list_get
from sqlglot.tokens import Token, Tokenizer, TokenType

logger = logging.getLogger("sqlglot")


class Parser:
    """
    Parser consumes a list of tokens produced by the :class:`~sqlglot.tokens.Tokenizer`
    and produces a parsed syntax tree.

    Args
        error_level (ErrorLevel): the desired error level. Default: ErrorLevel.RAISE.
        error_message_context (int): determines the amount of context to capture from
            a query string when displaying the error message (in number of characters).
            Default: 50.
        index_offset (int): Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list
            Default: 0
        alias_post_tablesample (bool): If the table alias comes after tablesample
            Default: False
        max_errors (int): Maximum number of error messages to include in a raised ParseError.
            This is only relevant if error_level is ErrorLevel.RAISE.
            Default: 3
        null_ordering (str): Indicates the default null ordering method to use if not explicitly set.
            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
            Default: "nulls_are_small"
    """

    FUNCTIONS = {
        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
        "DATE_TO_DATE_STR": lambda args: exp.Cast(
            this=list_get(args, 0),
            to=exp.DataType(this=exp.DataType.Type.TEXT),
        ),
        "TIME_TO_TIME_STR": lambda args: exp.Cast(
            this=list_get(args, 0),
            to=exp.DataType(this=exp.DataType.Type.TEXT),
        ),
        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
            this=exp.Cast(
                this=list_get(args, 0),
                to=exp.DataType(this=exp.DataType.Type.TEXT),
            ),
            start=exp.Literal.number(1),
            length=exp.Literal.number(10),
        ),
    }

    NO_PAREN_FUNCTIONS = {
        TokenType.CURRENT_DATE: exp.CurrentDate,
        TokenType.CURRENT_DATETIME: exp.CurrentDate,
        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
    }

    NESTED_TYPE_TOKENS = {
        TokenType.ARRAY,
        TokenType.MAP,
        TokenType.STRUCT,
        TokenType.NULLABLE,
    }

    TYPE_TOKENS = {
        TokenType.BOOLEAN,
        TokenType.TINYINT,
        TokenType.SMALLINT,
        TokenType.INT,
        TokenType.BIGINT,
        TokenType.FLOAT,
        TokenType.DOUBLE,
        TokenType.CHAR,
        TokenType.NCHAR,
        TokenType.VARCHAR,
        TokenType.NVARCHAR,
        TokenType.TEXT,
        TokenType.BINARY,
        TokenType.JSON,
        TokenType.TIMESTAMP,
        TokenType.TIMESTAMPTZ,
        TokenType.DATETIME,
        TokenType.DATE,
        TokenType.DECIMAL,
        TokenType.UUID,
        TokenType.GEOGRAPHY,
        *NESTED_TYPE_TOKENS,
    }

    SUBQUERY_PREDICATES = {
        TokenType.ANY: exp.Any,
        TokenType.ALL: exp.All,
        TokenType.EXISTS: exp.Exists,
        TokenType.SOME: exp.Any,
    }

    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}

    ID_VAR_TOKENS = {
        TokenType.VAR,
        TokenType.ALTER,
        TokenType.BEGIN,
        TokenType.BUCKET,
        TokenType.CACHE,
        TokenType.COLLATE,
        TokenType.COMMIT,
        TokenType.CONSTRAINT,
        TokenType.CONVERT,
        TokenType.DEFAULT,
        TokenType.DELETE,
        TokenType.ENGINE,
        TokenType.ESCAPE,
        TokenType.EXPLAIN,
        TokenType.FALSE,
        TokenType.FIRST,
        TokenType.FOLLOWING,
        TokenType.FORMAT,
        TokenType.FUNCTION,
        TokenType.IF,
        TokenType.INDEX,
        TokenType.ISNULL,
        TokenType.INTERVAL,
        TokenType.LAZY,
        TokenType.LOCATION,
        TokenType.NEXT,
        TokenType.ONLY,
        TokenType.OPTIMIZE,
        TokenType.OPTIONS,
        TokenType.ORDINALITY,
        TokenType.PERCENT,
        TokenType.PRECEDING,
        TokenType.RANGE,
        TokenType.REFERENCES,
        TokenType.ROWS,
        TokenType.SCHEMA_COMMENT,
        TokenType.SET,
        TokenType.SHOW,
        TokenType.STORED,
        TokenType.TABLE,
        TokenType.TABLE_FORMAT,
        TokenType.TEMPORARY,
        TokenType.TOP,
        TokenType.TRUNCATE,
        TokenType.TRUE,
        TokenType.UNBOUNDED,
        TokenType.UNIQUE,
        TokenType.PROPERTIES,
        *SUBQUERY_PREDICATES,
        *TYPE_TOKENS,
    }

    CASTS = {
        TokenType.CAST,
        TokenType.TRY_CAST,
    }

    FUNC_TOKENS = {
        TokenType.CONVERT,
        TokenType.CURRENT_DATE,
        TokenType.CURRENT_DATETIME,
        TokenType.CURRENT_TIMESTAMP,
        TokenType.CURRENT_TIME,
        TokenType.EXTRACT,
        TokenType.FILTER,
        TokenType.FIRST,
        TokenType.FORMAT,
        TokenType.ISNULL,
        TokenType.OFFSET,
        TokenType.PRIMARY_KEY,
        TokenType.REPLACE,
        TokenType.ROW,
        TokenType.UNNEST,
        TokenType.VAR,
        TokenType.LEFT,
        TokenType.RIGHT,
        TokenType.DATE,
        TokenType.DATETIME,
        TokenType.TIMESTAMP,
        TokenType.TIMESTAMPTZ,
        *CASTS,
        *NESTED_TYPE_TOKENS,
        *SUBQUERY_PREDICATES,
    }

    CONJUNCTION = {
        TokenType.AND: exp.And,
        TokenType.OR: exp.Or,
    }

    EQUALITY = {
        TokenType.EQ: exp.EQ,
        TokenType.NEQ: exp.NEQ,
    }

    COMPARISON = {
        TokenType.GT: exp.GT,
        TokenType.GTE: exp.GTE,
        TokenType.LT: exp.LT,
        TokenType.LTE: exp.LTE,
    }

    BITWISE = {
        TokenType.AMP: exp.BitwiseAnd,
        TokenType.CARET: exp.BitwiseXor,
        TokenType.PIPE: exp.BitwiseOr,
        TokenType.DPIPE: exp.DPipe,
    }

    TERM = {
        TokenType.DASH: exp.Sub,
        TokenType.PLUS: exp.Add,
        TokenType.MOD: exp.Mod,
    }

    FACTOR = {
        TokenType.DIV: exp.IntDiv,
        TokenType.SLASH: exp.Div,
        TokenType.STAR: exp.Mul,
    }

    TIMESTAMPS = {
        TokenType.TIMESTAMP,
        TokenType.TIMESTAMPTZ,
    }

    SET_OPERATIONS = {
        TokenType.UNION,
        TokenType.INTERSECT,
        TokenType.EXCEPT,
    }

    JOIN_SIDES = {
        TokenType.LEFT,
        TokenType.RIGHT,
        TokenType.FULL,
    }

    JOIN_KINDS = {
        TokenType.INNER,
        TokenType.OUTER,
        TokenType.CROSS,
    }

    COLUMN_OPERATORS = {
        TokenType.DOT: None,
        TokenType.ARROW: lambda self, this, path: self.expression(
            exp.JSONExtract,
            this=this,
            path=path,
        ),
        TokenType.DARROW: lambda self, this, path: self.expression(
            exp.JSONExtractScalar,
            this=this,
            path=path,
        ),
        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
            exp.JSONBExtract,
            this=this,
            path=path,
        ),
        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
            exp.JSONBExtractScalar,
            this=this,
            path=path,
        ),
    }

    EXPRESSION_PARSERS = {
        exp.DataType: lambda self: self._parse_types(),
        exp.From: lambda self: self._parse_from(),
        exp.Group: lambda self: self._parse_group(),
        exp.Lateral: lambda self: self._parse_lateral(),
        exp.Join: lambda self: self._parse_join(),
        exp.Order: lambda self: self._parse_order(),
        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
        exp.Lambda: lambda self: self._parse_lambda(),
        exp.Limit: lambda self: self._parse_limit(),
        exp.Offset: lambda self: self._parse_offset(),
        exp.TableAlias: lambda self: self._parse_table_alias(),
        exp.Table: lambda self: self._parse_table(),
        exp.Condition: lambda self: self._parse_conjunction(),
        exp.Expression: lambda self: self._parse_statement(),
        exp.Properties: lambda self: self._parse_properties(),
        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
    }

    STATEMENT_PARSERS = {
        TokenType.CREATE: lambda self: self._parse_create(),
        TokenType.DROP: lambda self: self._parse_drop(),
        TokenType.INSERT: lambda self: self._parse_insert(),
        TokenType.UPDATE: lambda self: self._parse_update(),
        TokenType.DELETE: lambda self: self._parse_delete(),
        TokenType.CACHE: lambda self: self._parse_cache(),
        TokenType.UNCACHE: lambda self: self._parse_uncache(),
    }

    PRIMARY_PARSERS = {
        TokenType.STRING: lambda _, token: exp.Literal.string(token.text),
        TokenType.NUMBER: lambda _, token: exp.Literal.number(token.text),
        TokenType.STAR: lambda self, _: exp.Star(
            **{"except": self._parse_except(), "replace": self._parse_replace()}
        ),
        TokenType.NULL: lambda *_: exp.Null(),
        TokenType.TRUE: lambda *_: exp.Boolean(this=True),
        TokenType.FALSE: lambda *_: exp.Boolean(this=False),
        TokenType.PLACEHOLDER: lambda *_: exp.Placeholder(),
        TokenType.BIT_STRING: lambda _, token: exp.BitString(this=token.text),
        TokenType.INTRODUCER: lambda self, token: self.expression(
            exp.Introducer,
            this=token.text,
            expression=self._parse_var_or_string(),
        ),
    }

    RANGE_PARSERS = {
        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
        TokenType.IN: lambda self, this: self._parse_in(this),
        TokenType.IS: lambda self, this: self._parse_is(this),
        TokenType.LIKE: lambda self, this: self._parse_escape(
            self.expression(exp.Like, this=this, expression=self._parse_type())
        ),
        TokenType.ILIKE: lambda self, this: self._parse_escape(
            self.expression(exp.ILike, this=this, expression=self._parse_type())
        ),
        TokenType.RLIKE: lambda self, this: self.expression(
            exp.RegexpLike, this=this, expression=self._parse_type()
        ),
    }

    PROPERTY_PARSERS = {
        TokenType.AUTO_INCREMENT: lambda self: self._parse_auto_increment(),
        TokenType.CHARACTER_SET: lambda self: self._parse_character_set(),
        TokenType.COLLATE: lambda self: self._parse_collate(),
        TokenType.ENGINE: lambda self: self._parse_engine(),
        TokenType.FORMAT: lambda self: self._parse_format(),
        TokenType.LOCATION: lambda self: self.expression(
            exp.LocationProperty,
            this=exp.Literal.string("LOCATION"),
            value=self._parse_string(),
        ),
        TokenType.PARTITIONED_BY: lambda self: self.expression(
            exp.PartitionedByProperty,
            this=exp.Literal.string("PARTITIONED_BY"),
            value=self._parse_schema(),
        ),
        TokenType.SCHEMA_COMMENT: lambda self: self._parse_schema_comment(),
        TokenType.STORED: lambda self: self._parse_stored(),
        TokenType.TABLE_FORMAT: lambda self: self._parse_table_format(),
        TokenType.USING: lambda self: self._parse_table_format(),
    }

    CONSTRAINT_PARSERS = {
        TokenType.CHECK: lambda self: self._parse_check(),
        TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(),
        TokenType.UNIQUE: lambda self: self._parse_unique(),
    }

    NO_PAREN_FUNCTION_PARSERS = {
        TokenType.CASE: lambda self: self._parse_case(),
        TokenType.IF: lambda self: self._parse_if(),
    }

    FUNCTION_PARSERS = {
        TokenType.CONVERT: lambda self, _: self._parse_convert(),
        TokenType.EXTRACT: lambda self, _: self._parse_extract(),
        **{
            token_type: lambda self, token_type: self._parse_cast(
                self.STRICT_CAST and token_type == TokenType.CAST
            )
            for token_type in CASTS
        },
    }

    QUERY_MODIFIER_PARSERS = {
        "laterals": lambda self: self._parse_laterals(),
        "joins": lambda self: self._parse_joins(),
        "where": lambda self: self._parse_where(),
        "group": lambda self: self._parse_group(),
        "having": lambda self: self._parse_having(),
        "qualify": lambda self: self._parse_qualify(),
        "window": lambda self: self._match(TokenType.WINDOW)
        and self._parse_window(self._parse_id_var(), alias=True),
        "distribute": lambda self: self._parse_sort(
            TokenType.DISTRIBUTE_BY, exp.Distribute
        ),
        "sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
        "cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
        "order": lambda self: self._parse_order(),
        "limit": lambda self: self._parse_limit(),
        "offset": lambda self: self._parse_offset(),
    }

    CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX}

    STRICT_CAST = True

    __slots__ = (
        "error_level",
        "error_message_context",
        "sql",
        "errors",
        "index_offset",
        "unnest_column_only",
        "alias_post_tablesample",
        "max_errors",
        "null_ordering",
        "_tokens",
        "_chunks",
        "_index",
        "_curr",
        "_next",
        "_prev",
        "_greedy_subqueries",
    )

    def __init__(
        self,
        error_level=None,
        error_message_context=100,
        index_offset=0,
        unnest_column_only=False,
        alias_post_tablesample=False,
        max_errors=3,
        null_ordering=None,
    ):
        self.error_level = error_level or ErrorLevel.RAISE
        self.error_message_context = error_message_context
        self.index_offset = index_offset
        self.unnest_column_only = unnest_column_only
        self.alias_post_tablesample = alias_post_tablesample
        self.max_errors = max_errors
        self.null_ordering = null_ordering
        self.reset()

    def reset(self):
        self.sql = ""
        self.errors = []
        self._tokens = []
        self._chunks = [[]]
        self._index = 0
        self._curr = None
        self._next = None
        self._prev = None
        self._greedy_subqueries = False

    def parse(self, raw_tokens, sql=None):
        """
        Parses the given list of tokens and returns a list of syntax trees, one tree
        per parsed SQL statement.

        Args
            raw_tokens (list): the list of tokens (:class:`~sqlglot.tokens.Token`).
            sql (str): the original SQL string. Used to produce helpful debug messages.

        Returns
            the list of syntax trees (:class:`~sqlglot.expressions.Expression`).
        """
        return self._parse(
            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
        )

    def parse_into(self, expression_types, raw_tokens, sql=None):
        for expression_type in ensure_list(expression_types):
            parser = self.EXPRESSION_PARSERS.get(expression_type)
            if not parser:
                raise TypeError(f"No parser registered for {expression_type}")
            try:
                return self._parse(parser, raw_tokens, sql)
            except ParseError as e:
                error = e
        raise ParseError(f"Failed to parse into {expression_types}") from error

    def _parse(self, parse_method, raw_tokens, sql=None):
        self.reset()
        self.sql = sql or ""
        total = len(raw_tokens)

        for i, token in enumerate(raw_tokens):
            if token.token_type == TokenType.SEMICOLON:
                if i < total - 1:
                    self._chunks.append([])
            else:
                self._chunks[-1].append(token)

        expressions = []

        for tokens in self._chunks:
            self._index = -1
            self._tokens = tokens
            self._advance()
            expressions.append(parse_method(self))

            if self._index < len(self._tokens):
                self.raise_error("Invalid expression / Unexpected token")

            self.check_errors()

        return expressions

    def check_errors(self):
        if self.error_level == ErrorLevel.WARN:
            for error in self.errors:
                logger.error(str(error))
        elif self.error_level == ErrorLevel.RAISE and self.errors:
            raise ParseError(concat_errors(self.errors, self.max_errors))

    def raise_error(self, message, token=None):
        token = token or self._curr or self._prev or Token.string("")
        start = self._find_token(token, self.sql)
        end = start + len(token.text)
        start_context = self.sql[max(start - self.error_message_context, 0) : start]
        highlight = self.sql[start:end]
        end_context = self.sql[end : end + self.error_message_context]
        error = ParseError(
            f"{message}. Line {token.line}, Col: {token.col}.\n"
            f"  {start_context}\033[4m{highlight}\033[0m{end_context}"
        )
        if self.error_level == ErrorLevel.IMMEDIATE:
            raise error
        self.errors.append(error)

    def expression(self, exp_class, **kwargs):
        instance = exp_class(**kwargs)
        self.validate_expression(instance)
        return instance

    def validate_expression(self, expression, args=None):
        if self.error_level == ErrorLevel.IGNORE:
            return

        for k in expression.args:
            if k not in expression.arg_types:
                self.raise_error(
                    f"Unexpected keyword: '{k}' for {expression.__class__}"
                )
        for k, mandatory in expression.arg_types.items():
            v = expression.args.get(k)
            if mandatory and (v is None or (isinstance(v, list) and not v)):
                self.raise_error(
                    f"Required keyword: '{k}' missing for {expression.__class__}"
                )

        if (
            args
            and len(args) > len(expression.arg_types)
            and not expression.is_var_len_args
        ):
            self.raise_error(
                f"The number of provided arguments ({len(args)}) is greater than "
                f"the maximum number of supported arguments ({len(expression.arg_types)})"
            )

    def _find_token(self, token, sql):
        line = 1
        col = 1
        index = 0

        while line < token.line or col < token.col:
            if Tokenizer.WHITE_SPACE.get(sql[index]) == TokenType.BREAK:
                line += 1
                col = 1
            else:
                col += 1
            index += 1

        return index

    def _get_token(self, index):
        return list_get(self._tokens, index)

    def _advance(self, times=1):
        self._index += times
        self._curr = self._get_token(self._index)
        self._next = self._get_token(self._index + 1)
        self._prev = self._get_token(self._index - 1) if self._index > 0 else None

    def _retreat(self, index):
        self._advance(index - self._index)

    def _parse_statement(self):
        if self._curr is None:
            return None

        if self._match_set(self.STATEMENT_PARSERS):
            return self.STATEMENT_PARSERS[self._prev.token_type](self)

        if self._match_set(Tokenizer.COMMANDS):
            return self.expression(
                exp.Command,
                this=self._prev.text,
                expression=self._parse_string(),
            )

        expression = self._parse_expression()
        expression = (
            self._parse_set_operations(expression)
            if expression
            else self._parse_select()
        )
        self._parse_query_modifiers(expression)
        return expression

    def _parse_drop(self):
        if self._match(TokenType.TABLE):
            kind = "TABLE"
        elif self._match(TokenType.VIEW):
            kind = "VIEW"
        else:
            self.raise_error("Expected TABLE or View")

        return self.expression(
            exp.Drop,
            exists=self._parse_exists(),
            this=self._parse_table(schema=True),
            kind=kind,
        )

    def _parse_exists(self, not_=False):
        return (
            self._match(TokenType.IF)
            and (not not_ or self._match(TokenType.NOT))
            and self._match(TokenType.EXISTS)
        )

    def _parse_create(self):
        replace = self._match(TokenType.OR) and self._match(TokenType.REPLACE)
        temporary = self._match(TokenType.TEMPORARY)
        unique = self._match(TokenType.UNIQUE)

        create_token = self._match_set(self.CREATABLES) and self._prev

        if not create_token:
            self.raise_error("Expected TABLE, VIEW, INDEX, or FUNCTION")

        exists = self._parse_exists(not_=True)
        this = None
        expression = None
        properties = None

        if create_token.token_type == TokenType.FUNCTION:
            this = self._parse_var()
            if self._match(TokenType.ALIAS):
                expression = self._parse_string()
        elif create_token.token_type == TokenType.INDEX:
            this = self._parse_index()
        elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
            this = self._parse_table(schema=True)
            properties = self._parse_properties(
                this if isinstance(this, exp.Schema) else None
            )
            if self._match(TokenType.ALIAS):
                expression = self._parse_select()

        return self.expression(
            exp.Create,
            this=this,
            kind=create_token.text,
            expression=expression,
            exists=exists,
            properties=properties,
            temporary=temporary,
            replace=replace,
            unique=unique,
        )

    def _parse_property(self, schema):
        if self._match_set(self.PROPERTY_PARSERS):
            return self.PROPERTY_PARSERS[self._prev.token_type](self)
        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
            return self._parse_character_set(True)

        if self._match_pair(TokenType.VAR, TokenType.EQ, advance=False):
            key = self._parse_var().this
            self._match(TokenType.EQ)

            if key.upper() == "PARTITIONED_BY":
                expression = exp.PartitionedByProperty
                value = self._parse_schema() or self._parse_bracket(self._parse_field())

                if schema and not isinstance(value, exp.Schema):
                    columns = {v.name.upper() for v in value.expressions}
                    partitions = [
                        expression
                        for expression in schema.expressions
                        if expression.this.name.upper() in columns
                    ]
                    schema.set(
                        "expressions",
                        [e for e in schema.expressions if e not in partitions],
                    )
                    value = self.expression(exp.Schema, expressions=partitions)
            else:
                value = self._parse_column()
                expression = exp.AnonymousProperty

            return self.expression(
                expression,
                this=exp.Literal.string(key),
                value=value,
            )
        return None

    def _parse_stored(self):
        self._match(TokenType.ALIAS)
        self._match(TokenType.EQ)
        return self.expression(
            exp.FileFormatProperty,
            this=exp.Literal.string("FORMAT"),
            value=exp.Literal.string(self._parse_var().name),
        )

    def _parse_format(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.FileFormatProperty,
            this=exp.Literal.string("FORMAT"),
            value=self._parse_string() or self._parse_var(),
        )

    def _parse_engine(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.EngineProperty,
            this=exp.Literal.string("ENGINE"),
            value=self._parse_var_or_string(),
        )

    def _parse_auto_increment(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.AutoIncrementProperty,
            this=exp.Literal.string("AUTO_INCREMENT"),
            value=self._parse_var() or self._parse_number(),
        )

    def _parse_collate(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.CollateProperty,
            this=exp.Literal.string("COLLATE"),
            value=self._parse_var_or_string(),
        )

    def _parse_schema_comment(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.SchemaCommentProperty,
            this=exp.Literal.string("COMMENT"),
            value=self._parse_string(),
        )

    def _parse_character_set(self, default=False):
        self._match(TokenType.EQ)
        return self.expression(
            exp.CharacterSetProperty,
            this=exp.Literal.string("CHARACTER_SET"),
            value=self._parse_var_or_string(),
            default=default,
        )

    def _parse_table_format(self):
        self._match(TokenType.EQ)
        return self.expression(
            exp.TableFormatProperty,
            this=exp.Literal.string("TABLE_FORMAT"),
            value=self._parse_var_or_string(),
        )

    def _parse_properties(self, schema=None):
        """
        Schema is included since if the table schema is defined and we later get a partition by expression
        then we will define those columns in the partition by section and not in with the rest of the
        columns
        """
        properties = []

        while True:
            if self._match(TokenType.WITH):
                self._match_l_paren()
                properties.extend(self._parse_csv(lambda: self._parse_property(schema)))
                self._match_r_paren()
            elif self._match(TokenType.PROPERTIES):
                self._match_l_paren()
                properties.extend(
                    self._parse_csv(
                        lambda: self.expression(
                            exp.AnonymousProperty,
                            this=self._parse_string(),
                            value=self._match(TokenType.EQ) and self._parse_string(),
                        )
                    )
                )
                self._match_r_paren()
            else:
                identified_property = self._parse_property(schema)
                if not identified_property:
                    break
                properties.append(identified_property)
        if properties:
            return self.expression(exp.Properties, expressions=properties)
        return None

    def _parse_insert(self):
        overwrite = self._match(TokenType.OVERWRITE)
        self._match(TokenType.INTO)
        self._match(TokenType.TABLE)
        return self.expression(
            exp.Insert,
            this=self._parse_table(schema=True),
            exists=self._parse_exists(),
            partition=self._parse_partition(),
            expression=self._parse_select(),
            overwrite=overwrite,
        )

    def _parse_delete(self):
        self._match(TokenType.FROM)

        return self.expression(
            exp.Delete,
            this=self._parse_table(schema=True),
            where=self._parse_where(),
        )

    def _parse_update(self):
        return self.expression(
            exp.Update,
            **{
                "this": self._parse_table(schema=True),
                "expressions": self._match(TokenType.SET)
                and self._parse_csv(self._parse_equality),
                "from": self._parse_from(),
                "where": self._parse_where(),
            },
        )

    def _parse_uncache(self):
        if not self._match(TokenType.TABLE):
            self.raise_error("Expecting TABLE after UNCACHE")
        return self.expression(
            exp.Uncache,
            exists=self._parse_exists(),
            this=self._parse_table(schema=True),
        )

    def _parse_cache(self):
        lazy = self._match(TokenType.LAZY)
        self._match(TokenType.TABLE)
        table = self._parse_table(schema=True)
        options = []

        if self._match(TokenType.OPTIONS):
            self._match_l_paren()
            k = self._parse_string()
            self._match(TokenType.EQ)
            v = self._parse_string()
            options = [k, v]
            self._match_r_paren()

        self._match(TokenType.ALIAS)
        return self.expression(
            exp.Cache,
            this=table,
            lazy=lazy,
            options=options,
            expression=self._parse_select(),
        )

    def _parse_partition(self):
        if not self._match(TokenType.PARTITION):
            return None

        def parse_values():
            k = self._parse_var()
            if self._match(TokenType.EQ):
                v = self._parse_string()
                return (k, v)
            return (k, None)

        self._match_l_paren()
        values = self._parse_csv(parse_values)
        self._match_r_paren()

        return self.expression(
            exp.Partition,
            this=values,
        )

    def _parse_value(self):
        self._match_l_paren()
        expressions = self._parse_csv(self._parse_conjunction)
        self._match_r_paren()
        return self.expression(exp.Tuple, expressions=expressions)

    def _parse_select(self, table=None):
        index = self._index

        if self._match(TokenType.SELECT):
            hint = self._parse_hint()
            all_ = self._match(TokenType.ALL)
            distinct = self._match(TokenType.DISTINCT)

            if distinct:
                distinct = self.expression(
                    exp.Distinct,
                    on=self._parse_value() if self._match(TokenType.ON) else None,
                )

            if all_ and distinct:
                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")

            limit = self._parse_limit(top=True)
            expressions = self._parse_csv(
                lambda: self._parse_annotation(self._parse_expression())
            )

            this = self.expression(
                exp.Select,
                hint=hint,
                distinct=distinct,
                expressions=expressions,
                limit=limit,
            )
            from_ = self._parse_from()
            if from_:
                this.set("from", from_)
            self._parse_query_modifiers(this)
        elif self._match(TokenType.WITH):
            recursive = self._match(TokenType.RECURSIVE)

            expressions = []

            while True:
                expressions.append(self._parse_cte())

                if not self._match(TokenType.COMMA):
                    break

            cte = self.expression(
                exp.With,
                expressions=expressions,
                recursive=recursive,
            )
            this = self._parse_statement()

            if not this:
                self.raise_error("Failed to parse any statement following CTE")
                return cte

            if "with" in this.arg_types:
                this.set(
                    "with",
                    self.expression(
                        exp.With,
                        expressions=expressions,
                        recursive=recursive,
                    ),
                )
            else:
                self.raise_error(f"{this.key} does not support CTE")
        elif self._match(TokenType.L_PAREN):
            this = self._parse_table() if table else self._parse_select()

            if this:
                self._parse_query_modifiers(this)
                self._match_r_paren()
                this = self._parse_subquery(this)
            else:
                self._retreat(index)
        elif self._match(TokenType.VALUES):
            this = self.expression(
                exp.Values, expressions=self._parse_csv(self._parse_value)
            )
            alias = self._parse_table_alias()
            if alias:
                this = self.expression(exp.Subquery, this=this, alias=alias)
        else:
            this = None

        return self._parse_set_operations(this) if this else None

    def _parse_cte(self):
        alias = self._parse_table_alias()
        if not alias or not alias.this:
            self.raise_error("Expected CTE to have alias")

        if not self._match(TokenType.ALIAS):
            self.raise_error("Expected AS in CTE")

        self._match_l_paren()
        expression = self._parse_statement()
        self._match_r_paren()

        return self.expression(
            exp.CTE,
            this=expression,
            alias=alias,
        )

    def _parse_table_alias(self):
        any_token = self._match(TokenType.ALIAS)
        alias = self._parse_id_var(any_token)
        columns = None

        if self._match(TokenType.L_PAREN):
            columns = self._parse_csv(lambda: self._parse_id_var(any_token))
            self._match_r_paren()

        if not alias and not columns:
            return None

        return self.expression(
            exp.TableAlias,
            this=alias,
            columns=columns,
        )

    def _parse_subquery(self, this):
        return self.expression(exp.Subquery, this=this, alias=self._parse_table_alias())

    def _parse_query_modifiers(self, this):
        if not isinstance(this, (exp.Subquery, exp.Subqueryable)):
            return

        for key, parser in self.QUERY_MODIFIER_PARSERS.items():
            expression = parser(self)

            if expression:
                this.set(key, expression)

    def _parse_annotation(self, expression):
        if self._match(TokenType.ANNOTATION):
            return self.expression(
                exp.Annotation, this=self._prev.text, expression=expression
            )

        return expression

    def _parse_hint(self):
        if self._match(TokenType.HINT):
            hints = self._parse_csv(self._parse_function)
            if not self._match(TokenType.HINT):
                self.raise_error("Expected */ after HINT")
            return self.expression(exp.Hint, expressions=hints)
        return None

    def _parse_from(self):
        if not self._match(TokenType.FROM):
            return None

        return self.expression(exp.From, expressions=self._parse_csv(self._parse_table))

    def _parse_laterals(self):
        return self._parse_all(self._parse_lateral)

    def _parse_lateral(self):
        if not self._match(TokenType.LATERAL):
            return None

        if not self._match(TokenType.VIEW):
            self.raise_error("Expected VIEW after LATERAL")

        outer = self._match(TokenType.OUTER)

        return self.expression(
            exp.Lateral,
            this=self._parse_function(),
            outer=outer,
            alias=self.expression(
                exp.TableAlias,
                this=self._parse_id_var(any_token=False),
                columns=(
                    self._parse_csv(self._parse_id_var)
                    if self._match(TokenType.ALIAS)
                    else None
                ),
            ),
        )

    def _parse_joins(self):
        return self._parse_all(self._parse_join)

    def _parse_join_side_and_kind(self):
        return (
            self._match_set(self.JOIN_SIDES) and self._prev,
            self._match_set(self.JOIN_KINDS) and self._prev,
        )

    def _parse_join(self):
        side, kind = self._parse_join_side_and_kind()

        if not self._match(TokenType.JOIN):
            return None

        kwargs = {"this": self._parse_table()}

        if side:
            kwargs["side"] = side.text
        if kind:
            kwargs["kind"] = kind.text

        if self._match(TokenType.ON):
            kwargs["on"] = self._parse_conjunction()
        elif self._match(TokenType.USING):
            kwargs["using"] = self._parse_wrapped_id_vars()

        return self.expression(exp.Join, **kwargs)

    def _parse_index(self):
        index = self._parse_id_var()
        self._match(TokenType.ON)
        self._match(TokenType.TABLE)  # hive
        return self.expression(
            exp.Index,
            this=index,
            table=self.expression(exp.Table, this=self._parse_id_var()),
            columns=self._parse_expression(),
        )

    def _parse_table(self, schema=False):
        unnest = self._parse_unnest()

        if unnest:
            return unnest

        subquery = self._parse_select(table=True)

        if subquery:
            return subquery

        catalog = None
        db = None
        table = (not schema and self._parse_function()) or self._parse_id_var(False)

        while self._match(TokenType.DOT):
            catalog = db
            db = table
            table = self._parse_id_var()

        if not table:
            self.raise_error("Expected table name")

        this = self.expression(exp.Table, this=table, db=db, catalog=catalog)

        if schema:
            return self._parse_schema(this=this)

        if self.alias_post_tablesample:
            table_sample = self._parse_table_sample()

        alias = self._parse_table_alias()

        if alias:
            this = self.expression(exp.Alias, this=this, alias=alias)

        if not self.alias_post_tablesample:
            table_sample = self._parse_table_sample()

        if table_sample:
            table_sample.set("this", this)
            this = table_sample

        return this

    def _parse_unnest(self):
        if not self._match(TokenType.UNNEST):
            return None

        self._match_l_paren()
        expressions = self._parse_csv(self._parse_column)
        self._match_r_paren()

        ordinality = bool(
            self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)
        )

        alias = self._parse_table_alias()

        if alias and self.unnest_column_only:
            if alias.args.get("columns"):
                self.raise_error("Unexpected extra column alias in unnest.")
            alias.set("columns", [alias.this])
            alias.set("this", None)

        return self.expression(
            exp.Unnest,
            expressions=expressions,
            ordinality=ordinality,
            alias=alias,
        )

    def _parse_table_sample(self):
        if not self._match(TokenType.TABLE_SAMPLE):
            return None

        method = self._parse_var()
        bucket_numerator = None
        bucket_denominator = None
        bucket_field = None
        percent = None
        rows = None
        size = None

        self._match_l_paren()

        if self._match(TokenType.BUCKET):
            bucket_numerator = self._parse_number()
            self._match(TokenType.OUT_OF)
            bucket_denominator = bucket_denominator = self._parse_number()
            self._match(TokenType.ON)
            bucket_field = self._parse_field()
        else:
            num = self._parse_number()

            if self._match(TokenType.PERCENT):
                percent = num
            elif self._match(TokenType.ROWS):
                rows = num
            else:
                size = num

        self._match_r_paren()

        return self.expression(
            exp.TableSample,
            method=method,
            bucket_numerator=bucket_numerator,
            bucket_denominator=bucket_denominator,
            bucket_field=bucket_field,
            percent=percent,
            rows=rows,
            size=size,
        )

    def _parse_where(self):
        if not self._match(TokenType.WHERE):
            return None
        return self.expression(exp.Where, this=self._parse_conjunction())

    def _parse_group(self):
        if not self._match(TokenType.GROUP_BY):
            return None
        return self.expression(
            exp.Group,
            expressions=self._parse_csv(self._parse_conjunction),
            grouping_sets=self._parse_grouping_sets(),
            cube=self._match(TokenType.CUBE) and self._parse_wrapped_id_vars(),
            rollup=self._match(TokenType.ROLLUP) and self._parse_wrapped_id_vars(),
        )

    def _parse_grouping_sets(self):
        if not self._match(TokenType.GROUPING_SETS):
            return None

        self._match_l_paren()
        grouping_sets = self._parse_csv(self._parse_grouping_set)
        self._match_r_paren()
        return grouping_sets

    def _parse_grouping_set(self):
        if self._match(TokenType.L_PAREN):
            grouping_set = self._parse_csv(self._parse_id_var)
            self._match_r_paren()
            return self.expression(exp.Tuple, expressions=grouping_set)
        return self._parse_id_var()

    def _parse_having(self):
        if not self._match(TokenType.HAVING):
            return None
        return self.expression(exp.Having, this=self._parse_conjunction())

    def _parse_qualify(self):
        if not self._match(TokenType.QUALIFY):
            return None
        return self.expression(exp.Qualify, this=self._parse_conjunction())

    def _parse_order(self, this=None):
        if not self._match(TokenType.ORDER_BY):
            return this

        return self.expression(
            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
        )

    def _parse_sort(self, token_type, exp_class):
        if not self._match(token_type):
            return None

        return self.expression(
            exp_class, expressions=self._parse_csv(self._parse_ordered)
        )

    def _parse_ordered(self):
        this = self._parse_conjunction()
        self._match(TokenType.ASC)
        is_desc = self._match(TokenType.DESC)
        is_nulls_first = self._match(TokenType.NULLS_FIRST)
        is_nulls_last = self._match(TokenType.NULLS_LAST)
        desc = is_desc or False
        asc = not desc
        nulls_first = is_nulls_first or False
        explicitly_null_ordered = is_nulls_first or is_nulls_last
        if (
            not explicitly_null_ordered
            and (
                (asc and self.null_ordering == "nulls_are_small")
                or (desc and self.null_ordering != "nulls_are_small")
            )
            and self.null_ordering != "nulls_are_last"
        ):
            nulls_first = True

        return self.expression(
            exp.Ordered, this=this, desc=desc, nulls_first=nulls_first
        )

    def _parse_limit(self, this=None, top=False):
        if self._match(TokenType.TOP if top else TokenType.LIMIT):
            return self.expression(
                exp.Limit, this=this, expression=self._parse_number()
            )
        if self._match(TokenType.FETCH):
            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
            direction = self._prev.text if direction else "FIRST"
            count = self._parse_number()
            self._match_set((TokenType.ROW, TokenType.ROWS))
            self._match(TokenType.ONLY)
            return self.expression(exp.Fetch, direction=direction, count=count)
        return this

    def _parse_offset(self, this=None):
        if not self._match(TokenType.OFFSET):
            return this
        count = self._parse_number()
        self._match_set((TokenType.ROW, TokenType.ROWS))
        return self.expression(exp.Offset, this=this, expression=count)

    def _parse_set_operations(self, this):
        if not self._match_set(self.SET_OPERATIONS):
            return this

        token_type = self._prev.token_type

        if token_type == TokenType.UNION:
            expression = exp.Union
        elif token_type == TokenType.EXCEPT:
            expression = exp.Except
        else:
            expression = exp.Intersect

        return self.expression(
            expression,
            this=this,
            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
            expression=self._parse_select(),
        )

    def _parse_expression(self):
        return self._parse_alias(self._parse_conjunction())

    def _parse_conjunction(self):
        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)

    def _parse_equality(self):
        return self._parse_tokens(self._parse_comparison, self.EQUALITY)

    def _parse_comparison(self):
        return self._parse_tokens(self._parse_range, self.COMPARISON)

    def _parse_range(self):
        this = self._parse_bitwise()
        negate = self._match(TokenType.NOT)

        if self._match_set(self.RANGE_PARSERS):
            this = self.RANGE_PARSERS[self._prev.token_type](self, this)

        if negate:
            this = self.expression(exp.Not, this=this)

        return this

    def _parse_is(self, this):
        negate = self._match(TokenType.NOT)
        this = self.expression(
            exp.Is,
            this=this,
            expression=self._parse_null() or self._parse_boolean(),
        )
        return self.expression(exp.Not, this=this) if negate else this

    def _parse_in(self, this):
        unnest = self._parse_unnest()
        if unnest:
            this = self.expression(exp.In, this=this, unnest=unnest)
        else:
            self._match_l_paren()
            expressions = self._parse_csv(
                lambda: self._parse_select() or self._parse_expression()
            )

            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
                this = self.expression(exp.In, this=this, query=expressions[0])
            else:
                this = self.expression(exp.In, this=this, expressions=expressions)

            self._match_r_paren()
        return this

    def _parse_between(self, this):
        low = self._parse_bitwise()
        self._match(TokenType.AND)
        high = self._parse_bitwise()
        return self.expression(exp.Between, this=this, low=low, high=high)

    def _parse_escape(self, this):
        if not self._match(TokenType.ESCAPE):
            return this
        return self.expression(exp.Escape, this=this, expression=self._parse_string())

    def _parse_bitwise(self):
        this = self._parse_term()

        while True:
            if self._match_set(self.BITWISE):
                this = self.expression(
                    self.BITWISE[self._prev.token_type],
                    this=this,
                    expression=self._parse_term(),
                )
            elif self._match_pair(TokenType.LT, TokenType.LT):
                this = self.expression(
                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
                )
            elif self._match_pair(TokenType.GT, TokenType.GT):
                this = self.expression(
                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
                )
            else:
                break

        return this

    def _parse_term(self):
        return self._parse_tokens(self._parse_factor, self.TERM)

    def _parse_factor(self):
        return self._parse_tokens(self._parse_unary, self.FACTOR)

    def _parse_unary(self):
        if self._match(TokenType.NOT):
            return self.expression(exp.Not, this=self._parse_equality())
        if self._match(TokenType.TILDA):
            return self.expression(exp.BitwiseNot, this=self._parse_unary())
        if self._match(TokenType.DASH):
            return self.expression(exp.Neg, this=self._parse_unary())
        return self._parse_at_time_zone(self._parse_type())

    def _parse_type(self):
        if self._match(TokenType.INTERVAL):
            return self.expression(
                exp.Interval,
                this=self._parse_term(),
                unit=self._parse_var(),
            )

        index = self._index
        type_token = self._parse_types()
        this = self._parse_column()

        if type_token:
            if this:
                return self.expression(exp.Cast, this=this, to=type_token)
            if not type_token.args.get("expressions"):
                self._retreat(index)
                return self._parse_column()
            return type_token

        while self._match(TokenType.DCOLON):
            type_token = self._parse_types()
            if not type_token:
                self.raise_error("Expected type")
            this = self.expression(exp.Cast, this=this, to=type_token)

        return this

    def _parse_types(self):
        index = self._index

        if not self._match_set(self.TYPE_TOKENS):
            return None

        type_token = self._prev.token_type
        nested = type_token in self.NESTED_TYPE_TOKENS
        is_struct = type_token == TokenType.STRUCT
        expressions = None

        if self._match(TokenType.L_BRACKET):
            self._retreat(index)
            return None

        if self._match(TokenType.L_PAREN):
            if is_struct:
                expressions = self._parse_csv(self._parse_struct_kwargs)
            elif nested:
                expressions = self._parse_csv(self._parse_types)
            else:
                expressions = self._parse_csv(self._parse_number)

            if not expressions:
                self._retreat(index)
                return None

            self._match_r_paren()

        if nested and self._match(TokenType.LT):
            if is_struct:
                expressions = self._parse_csv(self._parse_struct_kwargs)
            else:
                expressions = self._parse_csv(self._parse_types)

            if not self._match(TokenType.GT):
                self.raise_error("Expecting >")

        if type_token in self.TIMESTAMPS:
            tz = self._match(TokenType.WITH_TIME_ZONE)
            self._match(TokenType.WITHOUT_TIME_ZONE)
            if tz:
                return exp.DataType(
                    this=exp.DataType.Type.TIMESTAMPTZ,
                    expressions=expressions,
                )
            return exp.DataType(
                this=exp.DataType.Type.TIMESTAMP,
                expressions=expressions,
            )

        return exp.DataType(
            this=exp.DataType.Type[type_token.value.upper()],
            expressions=expressions,
            nested=nested,
        )

    def _parse_struct_kwargs(self):
        this = self._parse_id_var()
        self._match(TokenType.COLON)
        data_type = self._parse_types()
        if not data_type:
            return None
        return self.expression(exp.StructKwarg, this=this, expression=data_type)

    def _parse_at_time_zone(self, this):
        if not self._match(TokenType.AT_TIME_ZONE):
            return this

        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())

    def _parse_column(self):
        this = self._parse_field()
        if isinstance(this, exp.Identifier):
            this = self.expression(exp.Column, this=this)
        elif not this:
            return self._parse_bracket(this)
        this = self._parse_bracket(this)

        while self._match_set(self.COLUMN_OPERATORS):
            op = self.COLUMN_OPERATORS.get(self._prev.token_type)
            field = self._parse_star() or self._parse_function() or self._parse_id_var()

            if isinstance(field, exp.Func):
                # bigquery allows function calls like x.y.count(...)
                # SAFE.SUBSTR(...)
                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
                this = self._replace_columns_with_dots(this)

            if op:
                this = op(self, this, exp.Literal.string(field.name))
            elif isinstance(this, exp.Column) and not this.table:
                this = self.expression(exp.Column, this=field, table=this.this)
            else:
                this = self.expression(exp.Dot, this=this, expression=field)
            this = self._parse_bracket(this)

        return this

    def _parse_primary(self):
        if self._match_set(self.PRIMARY_PARSERS):
            return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev)

        if self._match(TokenType.L_PAREN):
            query = self._parse_select()

            if query:
                expressions = [query]
            else:
                expressions = self._parse_csv(
                    lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
                )

            this = list_get(expressions, 0)
            self._parse_query_modifiers(this)
            self._match_r_paren()

            if isinstance(this, exp.Subqueryable):
                return self._parse_subquery(this)
            if len(expressions) > 1:
                return self.expression(exp.Tuple, expressions=expressions)
            return self.expression(exp.Paren, this=this)

        return None

    def _parse_field(self, any_token=False):
        return (
            self._parse_primary()
            or self._parse_function()
            or self._parse_id_var(any_token)
        )

    def _parse_function(self):
        if not self._curr:
            return None

        token_type = self._curr.token_type

        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)

        if not self._next or self._next.token_type != TokenType.L_PAREN:
            if token_type in self.NO_PAREN_FUNCTIONS:
                return self.expression(
                    self._advance() or self.NO_PAREN_FUNCTIONS[token_type]
                )
            return None

        if token_type not in self.FUNC_TOKENS:
            return None

        if self._match_set(self.FUNCTION_PARSERS):
            self._advance()
            this = self.FUNCTION_PARSERS[token_type](self, token_type)
        else:
            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
            this = self._curr.text
            self._advance(2)

            if subquery_predicate and self._curr.token_type in (
                TokenType.SELECT,
                TokenType.WITH,
            ):
                this = self.expression(subquery_predicate, this=self._parse_select())
                self._match_r_paren()
                return this

            function = self.FUNCTIONS.get(this.upper())
            args = self._parse_csv(self._parse_lambda)

            if function:
                this = function(args)
                self.validate_expression(this, args)
            else:
                this = self.expression(exp.Anonymous, this=this, expressions=args)
        self._match_r_paren()
        return self._parse_window(this)

    def _parse_lambda(self):
        index = self._index

        if self._match(TokenType.L_PAREN):
            expressions = self._parse_csv(self._parse_id_var)
            self._match(TokenType.R_PAREN)
        else:
            expressions = [self._parse_id_var()]

        if not self._match(TokenType.ARROW):
            self._retreat(index)

            distinct = self._match(TokenType.DISTINCT)
            this = self._parse_conjunction()

            if distinct:
                this = self.expression(exp.Distinct, this=this)

            if self._match(TokenType.IGNORE_NULLS):
                this = self.expression(exp.IgnoreNulls, this=this)
            else:
                self._match(TokenType.RESPECT_NULLS)

            return self._parse_alias(self._parse_limit(self._parse_order(this)))

        return self.expression(
            exp.Lambda,
            this=self._parse_conjunction(),
            expressions=expressions,
        )

    def _parse_schema(self, this=None):
        index = self._index
        if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
            self._retreat(index)
            return this

        args = self._parse_csv(
            lambda: self._parse_constraint()
            or self._parse_column_def(self._parse_field())
        )
        self._match_r_paren()
        return self.expression(exp.Schema, this=this, expressions=args)

    def _parse_column_def(self, this):
        kind = self._parse_types()

        if not kind:
            return this

        constraints = []
        while True:
            constraint = self._parse_column_constraint()
            if not constraint:
                break
            constraints.append(constraint)

        return self.expression(
            exp.ColumnDef, this=this, kind=kind, constraints=constraints
        )

    def _parse_column_constraint(self):
        kind = None
        this = None

        if self._match(TokenType.CONSTRAINT):
            this = self._parse_id_var()

        if self._match(TokenType.AUTO_INCREMENT):
            kind = exp.AutoIncrementColumnConstraint()
        elif self._match(TokenType.CHECK):
            self._match_l_paren()
            kind = self.expression(
                exp.CheckColumnConstraint, this=self._parse_conjunction()
            )
            self._match_r_paren()
        elif self._match(TokenType.COLLATE):
            kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
        elif self._match(TokenType.DEFAULT):
            kind = self.expression(
                exp.DefaultColumnConstraint, this=self._parse_field()
            )
        elif self._match(TokenType.NOT) and self._match(TokenType.NULL):
            kind = exp.NotNullColumnConstraint()
        elif self._match(TokenType.SCHEMA_COMMENT):
            kind = self.expression(
                exp.CommentColumnConstraint, this=self._parse_string()
            )
        elif self._match(TokenType.PRIMARY_KEY):
            kind = exp.PrimaryKeyColumnConstraint()
        elif self._match(TokenType.UNIQUE):
            kind = exp.UniqueColumnConstraint()

        if kind is None:
            return None

        return self.expression(exp.ColumnConstraint, this=this, kind=kind)

    def _parse_constraint(self):
        if not self._match(TokenType.CONSTRAINT):
            return self._parse_unnamed_constraint()

        this = self._parse_id_var()
        expressions = []

        while True:
            constraint = self._parse_unnamed_constraint() or self._parse_function()
            if not constraint:
                break
            expressions.append(constraint)

        return self.expression(exp.Constraint, this=this, expressions=expressions)

    def _parse_unnamed_constraint(self):
        if not self._match_set(self.CONSTRAINT_PARSERS):
            return None

        return self.CONSTRAINT_PARSERS[self._prev.token_type](self)

    def _parse_check(self):
        self._match(TokenType.CHECK)
        self._match_l_paren()
        expression = self._parse_conjunction()
        self._match_r_paren()

        return self.expression(exp.Check, this=expression)

    def _parse_unique(self):
        self._match(TokenType.UNIQUE)
        columns = self._parse_wrapped_id_vars()

        return self.expression(exp.Unique, expressions=columns)

    def _parse_foreign_key(self):
        self._match(TokenType.FOREIGN_KEY)

        expressions = self._parse_wrapped_id_vars()
        reference = self._match(TokenType.REFERENCES) and self.expression(
            exp.Reference,
            this=self._parse_id_var(),
            expressions=self._parse_wrapped_id_vars(),
        )
        options = {}

        while self._match(TokenType.ON):
            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
                self.raise_error("Expected DELETE or UPDATE")
            kind = self._prev.text.lower()

            if self._match(TokenType.NO_ACTION):
                action = "NO ACTION"
            elif self._match(TokenType.SET):
                self._match_set((TokenType.NULL, TokenType.DEFAULT))
                action = "SET " + self._prev.text.upper()
            else:
                self._advance()
                action = self._prev.text.upper()
            options[kind] = action

        return self.expression(
            exp.ForeignKey,
            expressions=expressions,
            reference=reference,
            **options,
        )

    def _parse_bracket(self, this):
        if not self._match(TokenType.L_BRACKET):
            return this

        expressions = self._parse_csv(self._parse_conjunction)

        if not this or this.name.upper() == "ARRAY":
            this = self.expression(exp.Array, expressions=expressions)
        else:
            expressions = apply_index_offset(expressions, -self.index_offset)
            this = self.expression(exp.Bracket, this=this, expressions=expressions)

        if not self._match(TokenType.R_BRACKET):
            self.raise_error("Expected ]")

        return self._parse_bracket(this)

    def _parse_case(self):
        ifs = []
        default = None

        expression = self._parse_conjunction()

        while self._match(TokenType.WHEN):
            this = self._parse_conjunction()
            self._match(TokenType.THEN)
            then = self._parse_conjunction()
            ifs.append(self.expression(exp.If, this=this, true=then))

        if self._match(TokenType.ELSE):
            default = self._parse_conjunction()

        if not self._match(TokenType.END):
            self.raise_error("Expected END after CASE", self._prev)

        return self._parse_window(
            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
        )

    def _parse_if(self):
        if self._match(TokenType.L_PAREN):
            args = self._parse_csv(self._parse_conjunction)
            this = exp.If.from_arg_list(args)
            self.validate_expression(this, args)
            self._match_r_paren()
        else:
            condition = self._parse_conjunction()
            self._match(TokenType.THEN)
            true = self._parse_conjunction()
            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
            self._match(TokenType.END)
            this = self.expression(exp.If, this=condition, true=true, false=false)
        return self._parse_window(this)

    def _parse_extract(self):
        this = self._parse_var() or self._parse_type()

        if not self._match(TokenType.FROM):
            self.raise_error("Expected FROM after EXTRACT", self._prev)

        return self.expression(exp.Extract, this=this, expression=self._parse_type())

    def _parse_cast(self, strict):
        this = self._parse_conjunction()

        if not self._match(TokenType.ALIAS):
            self.raise_error("Expected AS after CAST")

        to = self._parse_types()

        if not to:
            self.raise_error("Expected TYPE after CAST")
        elif to.this == exp.DataType.Type.CHAR:
            if self._match(TokenType.CHARACTER_SET):
                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())

        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)

    def _parse_convert(self):
        this = self._parse_field()
        if self._match(TokenType.USING):
            to = self.expression(exp.CharacterSet, this=self._parse_var())
        elif self._match(TokenType.COMMA):
            to = self._parse_types()
        else:
            to = None
        return self.expression(exp.Cast, this=this, to=to)

    def _parse_window(self, this, alias=False):
        if self._match(TokenType.FILTER):
            self._match_l_paren()
            this = self.expression(
                exp.Filter, this=this, expression=self._parse_where()
            )
            self._match_r_paren()

        if self._match(TokenType.WITHIN_GROUP):
            self._match_l_paren()
            this = self.expression(
                exp.WithinGroup,
                this=this,
                expression=self._parse_order(),
            )
            self._match_r_paren()
            return this

        # bigquery select from window x AS (partition by ...)
        if alias:
            self._match(TokenType.ALIAS)
        elif not self._match(TokenType.OVER):
            return this

        if not self._match(TokenType.L_PAREN):
            alias = self._parse_id_var(False)

            return self.expression(
                exp.Window,
                this=this,
                alias=alias,
            )

        partition = None

        alias = self._parse_id_var(False)

        if self._match(TokenType.PARTITION_BY):
            partition = self._parse_csv(self._parse_conjunction)

        order = self._parse_order()

        spec = None
        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text

        if kind:
            self._match(TokenType.BETWEEN)
            start = self._parse_window_spec()
            self._match(TokenType.AND)
            end = self._parse_window_spec()

            spec = self.expression(
                exp.WindowSpec,
                kind=kind,
                start=start["value"],
                start_side=start["side"],
                end=end["value"],
                end_side=end["side"],
            )

        self._match_r_paren()

        return self.expression(
            exp.Window,
            this=this,
            partition_by=partition,
            order=order,
            spec=spec,
            alias=alias,
        )

    def _parse_window_spec(self):
        self._match(TokenType.BETWEEN)

        return {
            "value": (
                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW))
                and self._prev.text
            )
            or self._parse_bitwise(),
            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING))
            and self._prev.text,
        }

    def _parse_alias(self, this, explicit=False):
        any_token = self._match(TokenType.ALIAS)

        if explicit and not any_token:
            return this

        if self._match(TokenType.L_PAREN):
            aliases = self.expression(
                exp.Aliases,
                this=this,
                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
            )
            self._match_r_paren()
            return aliases

        alias = self._parse_id_var(any_token)

        if alias:
            return self.expression(exp.Alias, this=this, alias=alias)

        return this

    def _parse_id_var(self, any_token=True):
        identifier = self._parse_identifier()

        if identifier:
            return identifier

        if (
            any_token
            and self._curr
            and self._curr.token_type not in self.RESERVED_KEYWORDS
        ):
            return self._advance() or exp.Identifier(this=self._prev.text, quoted=False)

        return self._match_set(self.ID_VAR_TOKENS) and exp.Identifier(
            this=self._prev.text, quoted=False
        )

    def _parse_string(self):
        if self._match(TokenType.STRING):
            return exp.Literal.string(self._prev.text)
        return self._parse_placeholder()

    def _parse_number(self):
        if self._match(TokenType.NUMBER):
            return exp.Literal.number(self._prev.text)
        return self._parse_placeholder()

    def _parse_identifier(self):
        if self._match(TokenType.IDENTIFIER):
            return exp.Identifier(this=self._prev.text, quoted=True)
        return self._parse_placeholder()

    def _parse_var(self):
        if self._match(TokenType.VAR):
            return exp.Var(this=self._prev.text)
        return self._parse_placeholder()

    def _parse_var_or_string(self):
        return self._parse_var() or self._parse_string()

    def _parse_null(self):
        if self._match(TokenType.NULL):
            return exp.Null()
        return None

    def _parse_boolean(self):
        if self._match(TokenType.TRUE):
            return exp.Boolean(this=True)
        if self._match(TokenType.FALSE):
            return exp.Boolean(this=False)
        return None

    def _parse_star(self):
        if self._match(TokenType.STAR):
            return exp.Star(
                **{"except": self._parse_except(), "replace": self._parse_replace()}
            )
        return None

    def _parse_placeholder(self):
        if self._match(TokenType.PLACEHOLDER):
            return exp.Placeholder()
        return None

    def _parse_except(self):
        if not self._match(TokenType.EXCEPT):
            return None

        return self._parse_wrapped_id_vars()

    def _parse_replace(self):
        if not self._match(TokenType.REPLACE):
            return None

        self._match_l_paren()
        columns = self._parse_csv(lambda: self._parse_alias(self._parse_expression()))
        self._match_r_paren()
        return columns

    def _parse_csv(self, parse):
        parse_result = parse()
        items = [parse_result] if parse_result is not None else []

        while self._match(TokenType.COMMA):
            parse_result = parse()
            if parse_result is not None:
                items.append(parse_result)

        return items

    def _parse_tokens(self, parse, expressions):
        this = parse()

        while self._match_set(expressions):
            this = self.expression(
                expressions[self._prev.token_type], this=this, expression=parse()
            )

        return this

    def _parse_all(self, parse):
        return list(iter(parse, None))

    def _parse_wrapped_id_vars(self):
        self._match_l_paren()
        expressions = self._parse_csv(self._parse_id_var)
        self._match_r_paren()
        return expressions

    def _match(self, token_type):
        if not self._curr:
            return None

        if self._curr.token_type == token_type:
            self._advance()
            return True

        return None

    def _match_set(self, types):
        if not self._curr:
            return None

        if self._curr.token_type in types:
            self._advance()
            return True

        return None

    def _match_pair(self, token_type_a, token_type_b, advance=True):
        if not self._curr or not self._next:
            return None

        if (
            self._curr.token_type == token_type_a
            and self._next.token_type == token_type_b
        ):
            if advance:
                self._advance(2)
            return True

        return None

    def _match_l_paren(self):
        if not self._match(TokenType.L_PAREN):
            self.raise_error("Expecting (")

    def _match_r_paren(self):
        if not self._match(TokenType.R_PAREN):
            self.raise_error("Expecting )")

    def _replace_columns_with_dots(self, this):
        if isinstance(this, exp.Dot):
            exp.replace_children(this, self._replace_columns_with_dots)
        elif isinstance(this, exp.Column):
            exp.replace_children(this, self._replace_columns_with_dots)
            table = this.args.get("table")
            this = (
                self.expression(exp.Dot, this=table, expression=this.this)
                if table
                else self.expression(exp.Var, this=this.name)
            )
        elif isinstance(this, exp.Identifier):
            this = self.expression(exp.Var, this=this.name)
        return this