2191 lines
70 KiB
Python
2191 lines
70 KiB
Python
|
import logging
|
||
|
|
||
|
from sqlglot import exp
|
||
|
from sqlglot.errors import ErrorLevel, ParseError, concat_errors
|
||
|
from sqlglot.helper import apply_index_offset, ensure_list, list_get
|
||
|
from sqlglot.tokens import Token, Tokenizer, TokenType
|
||
|
|
||
|
logger = logging.getLogger("sqlglot")
|
||
|
|
||
|
|
||
|
class Parser:
|
||
|
"""
|
||
|
Parser consumes a list of tokens produced by the :class:`~sqlglot.tokens.Tokenizer`
|
||
|
and produces a parsed syntax tree.
|
||
|
|
||
|
Args
|
||
|
error_level (ErrorLevel): the desired error level. Default: ErrorLevel.RAISE.
|
||
|
error_message_context (int): determines the amount of context to capture from
|
||
|
a query string when displaying the error message (in number of characters).
|
||
|
Default: 50.
|
||
|
index_offset (int): Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list
|
||
|
Default: 0
|
||
|
alias_post_tablesample (bool): If the table alias comes after tablesample
|
||
|
Default: False
|
||
|
max_errors (int): Maximum number of error messages to include in a raised ParseError.
|
||
|
This is only relevant if error_level is ErrorLevel.RAISE.
|
||
|
Default: 3
|
||
|
null_ordering (str): Indicates the default null ordering method to use if not explicitly set.
|
||
|
Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
|
||
|
Default: "nulls_are_small"
|
||
|
"""
|
||
|
|
||
|
FUNCTIONS = {
|
||
|
**{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
|
||
|
"DATE_TO_DATE_STR": lambda args: exp.Cast(
|
||
|
this=list_get(args, 0),
|
||
|
to=exp.DataType(this=exp.DataType.Type.TEXT),
|
||
|
),
|
||
|
"TIME_TO_TIME_STR": lambda args: exp.Cast(
|
||
|
this=list_get(args, 0),
|
||
|
to=exp.DataType(this=exp.DataType.Type.TEXT),
|
||
|
),
|
||
|
"TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
|
||
|
this=exp.Cast(
|
||
|
this=list_get(args, 0),
|
||
|
to=exp.DataType(this=exp.DataType.Type.TEXT),
|
||
|
),
|
||
|
start=exp.Literal.number(1),
|
||
|
length=exp.Literal.number(10),
|
||
|
),
|
||
|
}
|
||
|
|
||
|
NO_PAREN_FUNCTIONS = {
|
||
|
TokenType.CURRENT_DATE: exp.CurrentDate,
|
||
|
TokenType.CURRENT_DATETIME: exp.CurrentDate,
|
||
|
TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
|
||
|
}
|
||
|
|
||
|
NESTED_TYPE_TOKENS = {
|
||
|
TokenType.ARRAY,
|
||
|
TokenType.MAP,
|
||
|
TokenType.STRUCT,
|
||
|
TokenType.NULLABLE,
|
||
|
}
|
||
|
|
||
|
TYPE_TOKENS = {
|
||
|
TokenType.BOOLEAN,
|
||
|
TokenType.TINYINT,
|
||
|
TokenType.SMALLINT,
|
||
|
TokenType.INT,
|
||
|
TokenType.BIGINT,
|
||
|
TokenType.FLOAT,
|
||
|
TokenType.DOUBLE,
|
||
|
TokenType.CHAR,
|
||
|
TokenType.NCHAR,
|
||
|
TokenType.VARCHAR,
|
||
|
TokenType.NVARCHAR,
|
||
|
TokenType.TEXT,
|
||
|
TokenType.BINARY,
|
||
|
TokenType.JSON,
|
||
|
TokenType.TIMESTAMP,
|
||
|
TokenType.TIMESTAMPTZ,
|
||
|
TokenType.DATETIME,
|
||
|
TokenType.DATE,
|
||
|
TokenType.DECIMAL,
|
||
|
TokenType.UUID,
|
||
|
TokenType.GEOGRAPHY,
|
||
|
*NESTED_TYPE_TOKENS,
|
||
|
}
|
||
|
|
||
|
SUBQUERY_PREDICATES = {
|
||
|
TokenType.ANY: exp.Any,
|
||
|
TokenType.ALL: exp.All,
|
||
|
TokenType.EXISTS: exp.Exists,
|
||
|
TokenType.SOME: exp.Any,
|
||
|
}
|
||
|
|
||
|
RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
|
||
|
|
||
|
ID_VAR_TOKENS = {
|
||
|
TokenType.VAR,
|
||
|
TokenType.ALTER,
|
||
|
TokenType.BEGIN,
|
||
|
TokenType.BUCKET,
|
||
|
TokenType.CACHE,
|
||
|
TokenType.COLLATE,
|
||
|
TokenType.COMMIT,
|
||
|
TokenType.CONSTRAINT,
|
||
|
TokenType.CONVERT,
|
||
|
TokenType.DEFAULT,
|
||
|
TokenType.DELETE,
|
||
|
TokenType.ENGINE,
|
||
|
TokenType.ESCAPE,
|
||
|
TokenType.EXPLAIN,
|
||
|
TokenType.FALSE,
|
||
|
TokenType.FIRST,
|
||
|
TokenType.FOLLOWING,
|
||
|
TokenType.FORMAT,
|
||
|
TokenType.FUNCTION,
|
||
|
TokenType.IF,
|
||
|
TokenType.INDEX,
|
||
|
TokenType.ISNULL,
|
||
|
TokenType.INTERVAL,
|
||
|
TokenType.LAZY,
|
||
|
TokenType.LOCATION,
|
||
|
TokenType.NEXT,
|
||
|
TokenType.ONLY,
|
||
|
TokenType.OPTIMIZE,
|
||
|
TokenType.OPTIONS,
|
||
|
TokenType.ORDINALITY,
|
||
|
TokenType.PERCENT,
|
||
|
TokenType.PRECEDING,
|
||
|
TokenType.RANGE,
|
||
|
TokenType.REFERENCES,
|
||
|
TokenType.ROWS,
|
||
|
TokenType.SCHEMA_COMMENT,
|
||
|
TokenType.SET,
|
||
|
TokenType.SHOW,
|
||
|
TokenType.STORED,
|
||
|
TokenType.TABLE,
|
||
|
TokenType.TABLE_FORMAT,
|
||
|
TokenType.TEMPORARY,
|
||
|
TokenType.TOP,
|
||
|
TokenType.TRUNCATE,
|
||
|
TokenType.TRUE,
|
||
|
TokenType.UNBOUNDED,
|
||
|
TokenType.UNIQUE,
|
||
|
TokenType.PROPERTIES,
|
||
|
*SUBQUERY_PREDICATES,
|
||
|
*TYPE_TOKENS,
|
||
|
}
|
||
|
|
||
|
CASTS = {
|
||
|
TokenType.CAST,
|
||
|
TokenType.TRY_CAST,
|
||
|
}
|
||
|
|
||
|
FUNC_TOKENS = {
|
||
|
TokenType.CONVERT,
|
||
|
TokenType.CURRENT_DATE,
|
||
|
TokenType.CURRENT_DATETIME,
|
||
|
TokenType.CURRENT_TIMESTAMP,
|
||
|
TokenType.CURRENT_TIME,
|
||
|
TokenType.EXTRACT,
|
||
|
TokenType.FILTER,
|
||
|
TokenType.FIRST,
|
||
|
TokenType.FORMAT,
|
||
|
TokenType.ISNULL,
|
||
|
TokenType.OFFSET,
|
||
|
TokenType.PRIMARY_KEY,
|
||
|
TokenType.REPLACE,
|
||
|
TokenType.ROW,
|
||
|
TokenType.UNNEST,
|
||
|
TokenType.VAR,
|
||
|
TokenType.LEFT,
|
||
|
TokenType.RIGHT,
|
||
|
TokenType.DATE,
|
||
|
TokenType.DATETIME,
|
||
|
TokenType.TIMESTAMP,
|
||
|
TokenType.TIMESTAMPTZ,
|
||
|
*CASTS,
|
||
|
*NESTED_TYPE_TOKENS,
|
||
|
*SUBQUERY_PREDICATES,
|
||
|
}
|
||
|
|
||
|
CONJUNCTION = {
|
||
|
TokenType.AND: exp.And,
|
||
|
TokenType.OR: exp.Or,
|
||
|
}
|
||
|
|
||
|
EQUALITY = {
|
||
|
TokenType.EQ: exp.EQ,
|
||
|
TokenType.NEQ: exp.NEQ,
|
||
|
}
|
||
|
|
||
|
COMPARISON = {
|
||
|
TokenType.GT: exp.GT,
|
||
|
TokenType.GTE: exp.GTE,
|
||
|
TokenType.LT: exp.LT,
|
||
|
TokenType.LTE: exp.LTE,
|
||
|
}
|
||
|
|
||
|
BITWISE = {
|
||
|
TokenType.AMP: exp.BitwiseAnd,
|
||
|
TokenType.CARET: exp.BitwiseXor,
|
||
|
TokenType.PIPE: exp.BitwiseOr,
|
||
|
TokenType.DPIPE: exp.DPipe,
|
||
|
}
|
||
|
|
||
|
TERM = {
|
||
|
TokenType.DASH: exp.Sub,
|
||
|
TokenType.PLUS: exp.Add,
|
||
|
TokenType.MOD: exp.Mod,
|
||
|
}
|
||
|
|
||
|
FACTOR = {
|
||
|
TokenType.DIV: exp.IntDiv,
|
||
|
TokenType.SLASH: exp.Div,
|
||
|
TokenType.STAR: exp.Mul,
|
||
|
}
|
||
|
|
||
|
TIMESTAMPS = {
|
||
|
TokenType.TIMESTAMP,
|
||
|
TokenType.TIMESTAMPTZ,
|
||
|
}
|
||
|
|
||
|
SET_OPERATIONS = {
|
||
|
TokenType.UNION,
|
||
|
TokenType.INTERSECT,
|
||
|
TokenType.EXCEPT,
|
||
|
}
|
||
|
|
||
|
JOIN_SIDES = {
|
||
|
TokenType.LEFT,
|
||
|
TokenType.RIGHT,
|
||
|
TokenType.FULL,
|
||
|
}
|
||
|
|
||
|
JOIN_KINDS = {
|
||
|
TokenType.INNER,
|
||
|
TokenType.OUTER,
|
||
|
TokenType.CROSS,
|
||
|
}
|
||
|
|
||
|
COLUMN_OPERATORS = {
|
||
|
TokenType.DOT: None,
|
||
|
TokenType.ARROW: lambda self, this, path: self.expression(
|
||
|
exp.JSONExtract,
|
||
|
this=this,
|
||
|
path=path,
|
||
|
),
|
||
|
TokenType.DARROW: lambda self, this, path: self.expression(
|
||
|
exp.JSONExtractScalar,
|
||
|
this=this,
|
||
|
path=path,
|
||
|
),
|
||
|
TokenType.HASH_ARROW: lambda self, this, path: self.expression(
|
||
|
exp.JSONBExtract,
|
||
|
this=this,
|
||
|
path=path,
|
||
|
),
|
||
|
TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
|
||
|
exp.JSONBExtractScalar,
|
||
|
this=this,
|
||
|
path=path,
|
||
|
),
|
||
|
}
|
||
|
|
||
|
EXPRESSION_PARSERS = {
|
||
|
exp.DataType: lambda self: self._parse_types(),
|
||
|
exp.From: lambda self: self._parse_from(),
|
||
|
exp.Group: lambda self: self._parse_group(),
|
||
|
exp.Lateral: lambda self: self._parse_lateral(),
|
||
|
exp.Join: lambda self: self._parse_join(),
|
||
|
exp.Order: lambda self: self._parse_order(),
|
||
|
exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
|
||
|
exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
|
||
|
exp.Lambda: lambda self: self._parse_lambda(),
|
||
|
exp.Limit: lambda self: self._parse_limit(),
|
||
|
exp.Offset: lambda self: self._parse_offset(),
|
||
|
exp.TableAlias: lambda self: self._parse_table_alias(),
|
||
|
exp.Table: lambda self: self._parse_table(),
|
||
|
exp.Condition: lambda self: self._parse_conjunction(),
|
||
|
exp.Expression: lambda self: self._parse_statement(),
|
||
|
exp.Properties: lambda self: self._parse_properties(),
|
||
|
"JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
|
||
|
}
|
||
|
|
||
|
STATEMENT_PARSERS = {
|
||
|
TokenType.CREATE: lambda self: self._parse_create(),
|
||
|
TokenType.DROP: lambda self: self._parse_drop(),
|
||
|
TokenType.INSERT: lambda self: self._parse_insert(),
|
||
|
TokenType.UPDATE: lambda self: self._parse_update(),
|
||
|
TokenType.DELETE: lambda self: self._parse_delete(),
|
||
|
TokenType.CACHE: lambda self: self._parse_cache(),
|
||
|
TokenType.UNCACHE: lambda self: self._parse_uncache(),
|
||
|
}
|
||
|
|
||
|
PRIMARY_PARSERS = {
|
||
|
TokenType.STRING: lambda _, token: exp.Literal.string(token.text),
|
||
|
TokenType.NUMBER: lambda _, token: exp.Literal.number(token.text),
|
||
|
TokenType.STAR: lambda self, _: exp.Star(
|
||
|
**{"except": self._parse_except(), "replace": self._parse_replace()}
|
||
|
),
|
||
|
TokenType.NULL: lambda *_: exp.Null(),
|
||
|
TokenType.TRUE: lambda *_: exp.Boolean(this=True),
|
||
|
TokenType.FALSE: lambda *_: exp.Boolean(this=False),
|
||
|
TokenType.PLACEHOLDER: lambda *_: exp.Placeholder(),
|
||
|
TokenType.BIT_STRING: lambda _, token: exp.BitString(this=token.text),
|
||
|
TokenType.INTRODUCER: lambda self, token: self.expression(
|
||
|
exp.Introducer,
|
||
|
this=token.text,
|
||
|
expression=self._parse_var_or_string(),
|
||
|
),
|
||
|
}
|
||
|
|
||
|
RANGE_PARSERS = {
|
||
|
TokenType.BETWEEN: lambda self, this: self._parse_between(this),
|
||
|
TokenType.IN: lambda self, this: self._parse_in(this),
|
||
|
TokenType.IS: lambda self, this: self._parse_is(this),
|
||
|
TokenType.LIKE: lambda self, this: self._parse_escape(
|
||
|
self.expression(exp.Like, this=this, expression=self._parse_type())
|
||
|
),
|
||
|
TokenType.ILIKE: lambda self, this: self._parse_escape(
|
||
|
self.expression(exp.ILike, this=this, expression=self._parse_type())
|
||
|
),
|
||
|
TokenType.RLIKE: lambda self, this: self.expression(
|
||
|
exp.RegexpLike, this=this, expression=self._parse_type()
|
||
|
),
|
||
|
}
|
||
|
|
||
|
PROPERTY_PARSERS = {
|
||
|
TokenType.AUTO_INCREMENT: lambda self: self._parse_auto_increment(),
|
||
|
TokenType.CHARACTER_SET: lambda self: self._parse_character_set(),
|
||
|
TokenType.COLLATE: lambda self: self._parse_collate(),
|
||
|
TokenType.ENGINE: lambda self: self._parse_engine(),
|
||
|
TokenType.FORMAT: lambda self: self._parse_format(),
|
||
|
TokenType.LOCATION: lambda self: self.expression(
|
||
|
exp.LocationProperty,
|
||
|
this=exp.Literal.string("LOCATION"),
|
||
|
value=self._parse_string(),
|
||
|
),
|
||
|
TokenType.PARTITIONED_BY: lambda self: self.expression(
|
||
|
exp.PartitionedByProperty,
|
||
|
this=exp.Literal.string("PARTITIONED_BY"),
|
||
|
value=self._parse_schema(),
|
||
|
),
|
||
|
TokenType.SCHEMA_COMMENT: lambda self: self._parse_schema_comment(),
|
||
|
TokenType.STORED: lambda self: self._parse_stored(),
|
||
|
TokenType.TABLE_FORMAT: lambda self: self._parse_table_format(),
|
||
|
TokenType.USING: lambda self: self._parse_table_format(),
|
||
|
}
|
||
|
|
||
|
CONSTRAINT_PARSERS = {
|
||
|
TokenType.CHECK: lambda self: self._parse_check(),
|
||
|
TokenType.FOREIGN_KEY: lambda self: self._parse_foreign_key(),
|
||
|
TokenType.UNIQUE: lambda self: self._parse_unique(),
|
||
|
}
|
||
|
|
||
|
NO_PAREN_FUNCTION_PARSERS = {
|
||
|
TokenType.CASE: lambda self: self._parse_case(),
|
||
|
TokenType.IF: lambda self: self._parse_if(),
|
||
|
}
|
||
|
|
||
|
FUNCTION_PARSERS = {
|
||
|
TokenType.CONVERT: lambda self, _: self._parse_convert(),
|
||
|
TokenType.EXTRACT: lambda self, _: self._parse_extract(),
|
||
|
**{
|
||
|
token_type: lambda self, token_type: self._parse_cast(
|
||
|
self.STRICT_CAST and token_type == TokenType.CAST
|
||
|
)
|
||
|
for token_type in CASTS
|
||
|
},
|
||
|
}
|
||
|
|
||
|
QUERY_MODIFIER_PARSERS = {
|
||
|
"laterals": lambda self: self._parse_laterals(),
|
||
|
"joins": lambda self: self._parse_joins(),
|
||
|
"where": lambda self: self._parse_where(),
|
||
|
"group": lambda self: self._parse_group(),
|
||
|
"having": lambda self: self._parse_having(),
|
||
|
"qualify": lambda self: self._parse_qualify(),
|
||
|
"window": lambda self: self._match(TokenType.WINDOW)
|
||
|
and self._parse_window(self._parse_id_var(), alias=True),
|
||
|
"distribute": lambda self: self._parse_sort(
|
||
|
TokenType.DISTRIBUTE_BY, exp.Distribute
|
||
|
),
|
||
|
"sort": lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
|
||
|
"cluster": lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
|
||
|
"order": lambda self: self._parse_order(),
|
||
|
"limit": lambda self: self._parse_limit(),
|
||
|
"offset": lambda self: self._parse_offset(),
|
||
|
}
|
||
|
|
||
|
CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX}
|
||
|
|
||
|
STRICT_CAST = True
|
||
|
|
||
|
__slots__ = (
|
||
|
"error_level",
|
||
|
"error_message_context",
|
||
|
"sql",
|
||
|
"errors",
|
||
|
"index_offset",
|
||
|
"unnest_column_only",
|
||
|
"alias_post_tablesample",
|
||
|
"max_errors",
|
||
|
"null_ordering",
|
||
|
"_tokens",
|
||
|
"_chunks",
|
||
|
"_index",
|
||
|
"_curr",
|
||
|
"_next",
|
||
|
"_prev",
|
||
|
"_greedy_subqueries",
|
||
|
)
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
error_level=None,
|
||
|
error_message_context=100,
|
||
|
index_offset=0,
|
||
|
unnest_column_only=False,
|
||
|
alias_post_tablesample=False,
|
||
|
max_errors=3,
|
||
|
null_ordering=None,
|
||
|
):
|
||
|
self.error_level = error_level or ErrorLevel.RAISE
|
||
|
self.error_message_context = error_message_context
|
||
|
self.index_offset = index_offset
|
||
|
self.unnest_column_only = unnest_column_only
|
||
|
self.alias_post_tablesample = alias_post_tablesample
|
||
|
self.max_errors = max_errors
|
||
|
self.null_ordering = null_ordering
|
||
|
self.reset()
|
||
|
|
||
|
def reset(self):
|
||
|
self.sql = ""
|
||
|
self.errors = []
|
||
|
self._tokens = []
|
||
|
self._chunks = [[]]
|
||
|
self._index = 0
|
||
|
self._curr = None
|
||
|
self._next = None
|
||
|
self._prev = None
|
||
|
self._greedy_subqueries = False
|
||
|
|
||
|
def parse(self, raw_tokens, sql=None):
|
||
|
"""
|
||
|
Parses the given list of tokens and returns a list of syntax trees, one tree
|
||
|
per parsed SQL statement.
|
||
|
|
||
|
Args
|
||
|
raw_tokens (list): the list of tokens (:class:`~sqlglot.tokens.Token`).
|
||
|
sql (str): the original SQL string. Used to produce helpful debug messages.
|
||
|
|
||
|
Returns
|
||
|
the list of syntax trees (:class:`~sqlglot.expressions.Expression`).
|
||
|
"""
|
||
|
return self._parse(
|
||
|
parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
|
||
|
)
|
||
|
|
||
|
def parse_into(self, expression_types, raw_tokens, sql=None):
|
||
|
for expression_type in ensure_list(expression_types):
|
||
|
parser = self.EXPRESSION_PARSERS.get(expression_type)
|
||
|
if not parser:
|
||
|
raise TypeError(f"No parser registered for {expression_type}")
|
||
|
try:
|
||
|
return self._parse(parser, raw_tokens, sql)
|
||
|
except ParseError as e:
|
||
|
error = e
|
||
|
raise ParseError(f"Failed to parse into {expression_types}") from error
|
||
|
|
||
|
def _parse(self, parse_method, raw_tokens, sql=None):
|
||
|
self.reset()
|
||
|
self.sql = sql or ""
|
||
|
total = len(raw_tokens)
|
||
|
|
||
|
for i, token in enumerate(raw_tokens):
|
||
|
if token.token_type == TokenType.SEMICOLON:
|
||
|
if i < total - 1:
|
||
|
self._chunks.append([])
|
||
|
else:
|
||
|
self._chunks[-1].append(token)
|
||
|
|
||
|
expressions = []
|
||
|
|
||
|
for tokens in self._chunks:
|
||
|
self._index = -1
|
||
|
self._tokens = tokens
|
||
|
self._advance()
|
||
|
expressions.append(parse_method(self))
|
||
|
|
||
|
if self._index < len(self._tokens):
|
||
|
self.raise_error("Invalid expression / Unexpected token")
|
||
|
|
||
|
self.check_errors()
|
||
|
|
||
|
return expressions
|
||
|
|
||
|
def check_errors(self):
|
||
|
if self.error_level == ErrorLevel.WARN:
|
||
|
for error in self.errors:
|
||
|
logger.error(str(error))
|
||
|
elif self.error_level == ErrorLevel.RAISE and self.errors:
|
||
|
raise ParseError(concat_errors(self.errors, self.max_errors))
|
||
|
|
||
|
def raise_error(self, message, token=None):
|
||
|
token = token or self._curr or self._prev or Token.string("")
|
||
|
start = self._find_token(token, self.sql)
|
||
|
end = start + len(token.text)
|
||
|
start_context = self.sql[max(start - self.error_message_context, 0) : start]
|
||
|
highlight = self.sql[start:end]
|
||
|
end_context = self.sql[end : end + self.error_message_context]
|
||
|
error = ParseError(
|
||
|
f"{message}. Line {token.line}, Col: {token.col}.\n"
|
||
|
f" {start_context}\033[4m{highlight}\033[0m{end_context}"
|
||
|
)
|
||
|
if self.error_level == ErrorLevel.IMMEDIATE:
|
||
|
raise error
|
||
|
self.errors.append(error)
|
||
|
|
||
|
def expression(self, exp_class, **kwargs):
|
||
|
instance = exp_class(**kwargs)
|
||
|
self.validate_expression(instance)
|
||
|
return instance
|
||
|
|
||
|
def validate_expression(self, expression, args=None):
|
||
|
if self.error_level == ErrorLevel.IGNORE:
|
||
|
return
|
||
|
|
||
|
for k in expression.args:
|
||
|
if k not in expression.arg_types:
|
||
|
self.raise_error(
|
||
|
f"Unexpected keyword: '{k}' for {expression.__class__}"
|
||
|
)
|
||
|
for k, mandatory in expression.arg_types.items():
|
||
|
v = expression.args.get(k)
|
||
|
if mandatory and (v is None or (isinstance(v, list) and not v)):
|
||
|
self.raise_error(
|
||
|
f"Required keyword: '{k}' missing for {expression.__class__}"
|
||
|
)
|
||
|
|
||
|
if (
|
||
|
args
|
||
|
and len(args) > len(expression.arg_types)
|
||
|
and not expression.is_var_len_args
|
||
|
):
|
||
|
self.raise_error(
|
||
|
f"The number of provided arguments ({len(args)}) is greater than "
|
||
|
f"the maximum number of supported arguments ({len(expression.arg_types)})"
|
||
|
)
|
||
|
|
||
|
def _find_token(self, token, sql):
|
||
|
line = 1
|
||
|
col = 1
|
||
|
index = 0
|
||
|
|
||
|
while line < token.line or col < token.col:
|
||
|
if Tokenizer.WHITE_SPACE.get(sql[index]) == TokenType.BREAK:
|
||
|
line += 1
|
||
|
col = 1
|
||
|
else:
|
||
|
col += 1
|
||
|
index += 1
|
||
|
|
||
|
return index
|
||
|
|
||
|
def _get_token(self, index):
|
||
|
return list_get(self._tokens, index)
|
||
|
|
||
|
def _advance(self, times=1):
|
||
|
self._index += times
|
||
|
self._curr = self._get_token(self._index)
|
||
|
self._next = self._get_token(self._index + 1)
|
||
|
self._prev = self._get_token(self._index - 1) if self._index > 0 else None
|
||
|
|
||
|
def _retreat(self, index):
|
||
|
self._advance(index - self._index)
|
||
|
|
||
|
def _parse_statement(self):
|
||
|
if self._curr is None:
|
||
|
return None
|
||
|
|
||
|
if self._match_set(self.STATEMENT_PARSERS):
|
||
|
return self.STATEMENT_PARSERS[self._prev.token_type](self)
|
||
|
|
||
|
if self._match_set(Tokenizer.COMMANDS):
|
||
|
return self.expression(
|
||
|
exp.Command,
|
||
|
this=self._prev.text,
|
||
|
expression=self._parse_string(),
|
||
|
)
|
||
|
|
||
|
expression = self._parse_expression()
|
||
|
expression = (
|
||
|
self._parse_set_operations(expression)
|
||
|
if expression
|
||
|
else self._parse_select()
|
||
|
)
|
||
|
self._parse_query_modifiers(expression)
|
||
|
return expression
|
||
|
|
||
|
def _parse_drop(self):
|
||
|
if self._match(TokenType.TABLE):
|
||
|
kind = "TABLE"
|
||
|
elif self._match(TokenType.VIEW):
|
||
|
kind = "VIEW"
|
||
|
else:
|
||
|
self.raise_error("Expected TABLE or View")
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Drop,
|
||
|
exists=self._parse_exists(),
|
||
|
this=self._parse_table(schema=True),
|
||
|
kind=kind,
|
||
|
)
|
||
|
|
||
|
def _parse_exists(self, not_=False):
|
||
|
return (
|
||
|
self._match(TokenType.IF)
|
||
|
and (not not_ or self._match(TokenType.NOT))
|
||
|
and self._match(TokenType.EXISTS)
|
||
|
)
|
||
|
|
||
|
def _parse_create(self):
|
||
|
replace = self._match(TokenType.OR) and self._match(TokenType.REPLACE)
|
||
|
temporary = self._match(TokenType.TEMPORARY)
|
||
|
unique = self._match(TokenType.UNIQUE)
|
||
|
|
||
|
create_token = self._match_set(self.CREATABLES) and self._prev
|
||
|
|
||
|
if not create_token:
|
||
|
self.raise_error("Expected TABLE, VIEW, INDEX, or FUNCTION")
|
||
|
|
||
|
exists = self._parse_exists(not_=True)
|
||
|
this = None
|
||
|
expression = None
|
||
|
properties = None
|
||
|
|
||
|
if create_token.token_type == TokenType.FUNCTION:
|
||
|
this = self._parse_var()
|
||
|
if self._match(TokenType.ALIAS):
|
||
|
expression = self._parse_string()
|
||
|
elif create_token.token_type == TokenType.INDEX:
|
||
|
this = self._parse_index()
|
||
|
elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW):
|
||
|
this = self._parse_table(schema=True)
|
||
|
properties = self._parse_properties(
|
||
|
this if isinstance(this, exp.Schema) else None
|
||
|
)
|
||
|
if self._match(TokenType.ALIAS):
|
||
|
expression = self._parse_select()
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Create,
|
||
|
this=this,
|
||
|
kind=create_token.text,
|
||
|
expression=expression,
|
||
|
exists=exists,
|
||
|
properties=properties,
|
||
|
temporary=temporary,
|
||
|
replace=replace,
|
||
|
unique=unique,
|
||
|
)
|
||
|
|
||
|
def _parse_property(self, schema):
|
||
|
if self._match_set(self.PROPERTY_PARSERS):
|
||
|
return self.PROPERTY_PARSERS[self._prev.token_type](self)
|
||
|
if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
|
||
|
return self._parse_character_set(True)
|
||
|
|
||
|
if self._match_pair(TokenType.VAR, TokenType.EQ, advance=False):
|
||
|
key = self._parse_var().this
|
||
|
self._match(TokenType.EQ)
|
||
|
|
||
|
if key.upper() == "PARTITIONED_BY":
|
||
|
expression = exp.PartitionedByProperty
|
||
|
value = self._parse_schema() or self._parse_bracket(self._parse_field())
|
||
|
|
||
|
if schema and not isinstance(value, exp.Schema):
|
||
|
columns = {v.name.upper() for v in value.expressions}
|
||
|
partitions = [
|
||
|
expression
|
||
|
for expression in schema.expressions
|
||
|
if expression.this.name.upper() in columns
|
||
|
]
|
||
|
schema.set(
|
||
|
"expressions",
|
||
|
[e for e in schema.expressions if e not in partitions],
|
||
|
)
|
||
|
value = self.expression(exp.Schema, expressions=partitions)
|
||
|
else:
|
||
|
value = self._parse_column()
|
||
|
expression = exp.AnonymousProperty
|
||
|
|
||
|
return self.expression(
|
||
|
expression,
|
||
|
this=exp.Literal.string(key),
|
||
|
value=value,
|
||
|
)
|
||
|
return None
|
||
|
|
||
|
def _parse_stored(self):
|
||
|
self._match(TokenType.ALIAS)
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.FileFormatProperty,
|
||
|
this=exp.Literal.string("FORMAT"),
|
||
|
value=exp.Literal.string(self._parse_var().name),
|
||
|
)
|
||
|
|
||
|
def _parse_format(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.FileFormatProperty,
|
||
|
this=exp.Literal.string("FORMAT"),
|
||
|
value=self._parse_string() or self._parse_var(),
|
||
|
)
|
||
|
|
||
|
def _parse_engine(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.EngineProperty,
|
||
|
this=exp.Literal.string("ENGINE"),
|
||
|
value=self._parse_var_or_string(),
|
||
|
)
|
||
|
|
||
|
def _parse_auto_increment(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.AutoIncrementProperty,
|
||
|
this=exp.Literal.string("AUTO_INCREMENT"),
|
||
|
value=self._parse_var() or self._parse_number(),
|
||
|
)
|
||
|
|
||
|
def _parse_collate(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.CollateProperty,
|
||
|
this=exp.Literal.string("COLLATE"),
|
||
|
value=self._parse_var_or_string(),
|
||
|
)
|
||
|
|
||
|
def _parse_schema_comment(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.SchemaCommentProperty,
|
||
|
this=exp.Literal.string("COMMENT"),
|
||
|
value=self._parse_string(),
|
||
|
)
|
||
|
|
||
|
def _parse_character_set(self, default=False):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.CharacterSetProperty,
|
||
|
this=exp.Literal.string("CHARACTER_SET"),
|
||
|
value=self._parse_var_or_string(),
|
||
|
default=default,
|
||
|
)
|
||
|
|
||
|
def _parse_table_format(self):
|
||
|
self._match(TokenType.EQ)
|
||
|
return self.expression(
|
||
|
exp.TableFormatProperty,
|
||
|
this=exp.Literal.string("TABLE_FORMAT"),
|
||
|
value=self._parse_var_or_string(),
|
||
|
)
|
||
|
|
||
|
def _parse_properties(self, schema=None):
|
||
|
"""
|
||
|
Schema is included since if the table schema is defined and we later get a partition by expression
|
||
|
then we will define those columns in the partition by section and not in with the rest of the
|
||
|
columns
|
||
|
"""
|
||
|
properties = []
|
||
|
|
||
|
while True:
|
||
|
if self._match(TokenType.WITH):
|
||
|
self._match_l_paren()
|
||
|
properties.extend(self._parse_csv(lambda: self._parse_property(schema)))
|
||
|
self._match_r_paren()
|
||
|
elif self._match(TokenType.PROPERTIES):
|
||
|
self._match_l_paren()
|
||
|
properties.extend(
|
||
|
self._parse_csv(
|
||
|
lambda: self.expression(
|
||
|
exp.AnonymousProperty,
|
||
|
this=self._parse_string(),
|
||
|
value=self._match(TokenType.EQ) and self._parse_string(),
|
||
|
)
|
||
|
)
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
else:
|
||
|
identified_property = self._parse_property(schema)
|
||
|
if not identified_property:
|
||
|
break
|
||
|
properties.append(identified_property)
|
||
|
if properties:
|
||
|
return self.expression(exp.Properties, expressions=properties)
|
||
|
return None
|
||
|
|
||
|
def _parse_insert(self):
|
||
|
overwrite = self._match(TokenType.OVERWRITE)
|
||
|
self._match(TokenType.INTO)
|
||
|
self._match(TokenType.TABLE)
|
||
|
return self.expression(
|
||
|
exp.Insert,
|
||
|
this=self._parse_table(schema=True),
|
||
|
exists=self._parse_exists(),
|
||
|
partition=self._parse_partition(),
|
||
|
expression=self._parse_select(),
|
||
|
overwrite=overwrite,
|
||
|
)
|
||
|
|
||
|
def _parse_delete(self):
|
||
|
self._match(TokenType.FROM)
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Delete,
|
||
|
this=self._parse_table(schema=True),
|
||
|
where=self._parse_where(),
|
||
|
)
|
||
|
|
||
|
def _parse_update(self):
|
||
|
return self.expression(
|
||
|
exp.Update,
|
||
|
**{
|
||
|
"this": self._parse_table(schema=True),
|
||
|
"expressions": self._match(TokenType.SET)
|
||
|
and self._parse_csv(self._parse_equality),
|
||
|
"from": self._parse_from(),
|
||
|
"where": self._parse_where(),
|
||
|
},
|
||
|
)
|
||
|
|
||
|
def _parse_uncache(self):
|
||
|
if not self._match(TokenType.TABLE):
|
||
|
self.raise_error("Expecting TABLE after UNCACHE")
|
||
|
return self.expression(
|
||
|
exp.Uncache,
|
||
|
exists=self._parse_exists(),
|
||
|
this=self._parse_table(schema=True),
|
||
|
)
|
||
|
|
||
|
def _parse_cache(self):
|
||
|
lazy = self._match(TokenType.LAZY)
|
||
|
self._match(TokenType.TABLE)
|
||
|
table = self._parse_table(schema=True)
|
||
|
options = []
|
||
|
|
||
|
if self._match(TokenType.OPTIONS):
|
||
|
self._match_l_paren()
|
||
|
k = self._parse_string()
|
||
|
self._match(TokenType.EQ)
|
||
|
v = self._parse_string()
|
||
|
options = [k, v]
|
||
|
self._match_r_paren()
|
||
|
|
||
|
self._match(TokenType.ALIAS)
|
||
|
return self.expression(
|
||
|
exp.Cache,
|
||
|
this=table,
|
||
|
lazy=lazy,
|
||
|
options=options,
|
||
|
expression=self._parse_select(),
|
||
|
)
|
||
|
|
||
|
def _parse_partition(self):
|
||
|
if not self._match(TokenType.PARTITION):
|
||
|
return None
|
||
|
|
||
|
def parse_values():
|
||
|
k = self._parse_var()
|
||
|
if self._match(TokenType.EQ):
|
||
|
v = self._parse_string()
|
||
|
return (k, v)
|
||
|
return (k, None)
|
||
|
|
||
|
self._match_l_paren()
|
||
|
values = self._parse_csv(parse_values)
|
||
|
self._match_r_paren()
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Partition,
|
||
|
this=values,
|
||
|
)
|
||
|
|
||
|
def _parse_value(self):
|
||
|
self._match_l_paren()
|
||
|
expressions = self._parse_csv(self._parse_conjunction)
|
||
|
self._match_r_paren()
|
||
|
return self.expression(exp.Tuple, expressions=expressions)
|
||
|
|
||
|
def _parse_select(self, table=None):
|
||
|
index = self._index
|
||
|
|
||
|
if self._match(TokenType.SELECT):
|
||
|
hint = self._parse_hint()
|
||
|
all_ = self._match(TokenType.ALL)
|
||
|
distinct = self._match(TokenType.DISTINCT)
|
||
|
|
||
|
if distinct:
|
||
|
distinct = self.expression(
|
||
|
exp.Distinct,
|
||
|
on=self._parse_value() if self._match(TokenType.ON) else None,
|
||
|
)
|
||
|
|
||
|
if all_ and distinct:
|
||
|
self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
|
||
|
|
||
|
limit = self._parse_limit(top=True)
|
||
|
expressions = self._parse_csv(
|
||
|
lambda: self._parse_annotation(self._parse_expression())
|
||
|
)
|
||
|
|
||
|
this = self.expression(
|
||
|
exp.Select,
|
||
|
hint=hint,
|
||
|
distinct=distinct,
|
||
|
expressions=expressions,
|
||
|
limit=limit,
|
||
|
)
|
||
|
from_ = self._parse_from()
|
||
|
if from_:
|
||
|
this.set("from", from_)
|
||
|
self._parse_query_modifiers(this)
|
||
|
elif self._match(TokenType.WITH):
|
||
|
recursive = self._match(TokenType.RECURSIVE)
|
||
|
|
||
|
expressions = []
|
||
|
|
||
|
while True:
|
||
|
expressions.append(self._parse_cte())
|
||
|
|
||
|
if not self._match(TokenType.COMMA):
|
||
|
break
|
||
|
|
||
|
cte = self.expression(
|
||
|
exp.With,
|
||
|
expressions=expressions,
|
||
|
recursive=recursive,
|
||
|
)
|
||
|
this = self._parse_statement()
|
||
|
|
||
|
if not this:
|
||
|
self.raise_error("Failed to parse any statement following CTE")
|
||
|
return cte
|
||
|
|
||
|
if "with" in this.arg_types:
|
||
|
this.set(
|
||
|
"with",
|
||
|
self.expression(
|
||
|
exp.With,
|
||
|
expressions=expressions,
|
||
|
recursive=recursive,
|
||
|
),
|
||
|
)
|
||
|
else:
|
||
|
self.raise_error(f"{this.key} does not support CTE")
|
||
|
elif self._match(TokenType.L_PAREN):
|
||
|
this = self._parse_table() if table else self._parse_select()
|
||
|
|
||
|
if this:
|
||
|
self._parse_query_modifiers(this)
|
||
|
self._match_r_paren()
|
||
|
this = self._parse_subquery(this)
|
||
|
else:
|
||
|
self._retreat(index)
|
||
|
elif self._match(TokenType.VALUES):
|
||
|
this = self.expression(
|
||
|
exp.Values, expressions=self._parse_csv(self._parse_value)
|
||
|
)
|
||
|
alias = self._parse_table_alias()
|
||
|
if alias:
|
||
|
this = self.expression(exp.Subquery, this=this, alias=alias)
|
||
|
else:
|
||
|
this = None
|
||
|
|
||
|
return self._parse_set_operations(this) if this else None
|
||
|
|
||
|
def _parse_cte(self):
|
||
|
alias = self._parse_table_alias()
|
||
|
if not alias or not alias.this:
|
||
|
self.raise_error("Expected CTE to have alias")
|
||
|
|
||
|
if not self._match(TokenType.ALIAS):
|
||
|
self.raise_error("Expected AS in CTE")
|
||
|
|
||
|
self._match_l_paren()
|
||
|
expression = self._parse_statement()
|
||
|
self._match_r_paren()
|
||
|
|
||
|
return self.expression(
|
||
|
exp.CTE,
|
||
|
this=expression,
|
||
|
alias=alias,
|
||
|
)
|
||
|
|
||
|
def _parse_table_alias(self):
|
||
|
any_token = self._match(TokenType.ALIAS)
|
||
|
alias = self._parse_id_var(any_token)
|
||
|
columns = None
|
||
|
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
columns = self._parse_csv(lambda: self._parse_id_var(any_token))
|
||
|
self._match_r_paren()
|
||
|
|
||
|
if not alias and not columns:
|
||
|
return None
|
||
|
|
||
|
return self.expression(
|
||
|
exp.TableAlias,
|
||
|
this=alias,
|
||
|
columns=columns,
|
||
|
)
|
||
|
|
||
|
def _parse_subquery(self, this):
|
||
|
return self.expression(exp.Subquery, this=this, alias=self._parse_table_alias())
|
||
|
|
||
|
def _parse_query_modifiers(self, this):
|
||
|
if not isinstance(this, (exp.Subquery, exp.Subqueryable)):
|
||
|
return
|
||
|
|
||
|
for key, parser in self.QUERY_MODIFIER_PARSERS.items():
|
||
|
expression = parser(self)
|
||
|
|
||
|
if expression:
|
||
|
this.set(key, expression)
|
||
|
|
||
|
def _parse_annotation(self, expression):
|
||
|
if self._match(TokenType.ANNOTATION):
|
||
|
return self.expression(
|
||
|
exp.Annotation, this=self._prev.text, expression=expression
|
||
|
)
|
||
|
|
||
|
return expression
|
||
|
|
||
|
def _parse_hint(self):
|
||
|
if self._match(TokenType.HINT):
|
||
|
hints = self._parse_csv(self._parse_function)
|
||
|
if not self._match(TokenType.HINT):
|
||
|
self.raise_error("Expected */ after HINT")
|
||
|
return self.expression(exp.Hint, expressions=hints)
|
||
|
return None
|
||
|
|
||
|
def _parse_from(self):
|
||
|
if not self._match(TokenType.FROM):
|
||
|
return None
|
||
|
|
||
|
return self.expression(exp.From, expressions=self._parse_csv(self._parse_table))
|
||
|
|
||
|
def _parse_laterals(self):
|
||
|
return self._parse_all(self._parse_lateral)
|
||
|
|
||
|
def _parse_lateral(self):
|
||
|
if not self._match(TokenType.LATERAL):
|
||
|
return None
|
||
|
|
||
|
if not self._match(TokenType.VIEW):
|
||
|
self.raise_error("Expected VIEW after LATERAL")
|
||
|
|
||
|
outer = self._match(TokenType.OUTER)
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Lateral,
|
||
|
this=self._parse_function(),
|
||
|
outer=outer,
|
||
|
alias=self.expression(
|
||
|
exp.TableAlias,
|
||
|
this=self._parse_id_var(any_token=False),
|
||
|
columns=(
|
||
|
self._parse_csv(self._parse_id_var)
|
||
|
if self._match(TokenType.ALIAS)
|
||
|
else None
|
||
|
),
|
||
|
),
|
||
|
)
|
||
|
|
||
|
def _parse_joins(self):
|
||
|
return self._parse_all(self._parse_join)
|
||
|
|
||
|
def _parse_join_side_and_kind(self):
|
||
|
return (
|
||
|
self._match_set(self.JOIN_SIDES) and self._prev,
|
||
|
self._match_set(self.JOIN_KINDS) and self._prev,
|
||
|
)
|
||
|
|
||
|
def _parse_join(self):
|
||
|
side, kind = self._parse_join_side_and_kind()
|
||
|
|
||
|
if not self._match(TokenType.JOIN):
|
||
|
return None
|
||
|
|
||
|
kwargs = {"this": self._parse_table()}
|
||
|
|
||
|
if side:
|
||
|
kwargs["side"] = side.text
|
||
|
if kind:
|
||
|
kwargs["kind"] = kind.text
|
||
|
|
||
|
if self._match(TokenType.ON):
|
||
|
kwargs["on"] = self._parse_conjunction()
|
||
|
elif self._match(TokenType.USING):
|
||
|
kwargs["using"] = self._parse_wrapped_id_vars()
|
||
|
|
||
|
return self.expression(exp.Join, **kwargs)
|
||
|
|
||
|
def _parse_index(self):
|
||
|
index = self._parse_id_var()
|
||
|
self._match(TokenType.ON)
|
||
|
self._match(TokenType.TABLE) # hive
|
||
|
return self.expression(
|
||
|
exp.Index,
|
||
|
this=index,
|
||
|
table=self.expression(exp.Table, this=self._parse_id_var()),
|
||
|
columns=self._parse_expression(),
|
||
|
)
|
||
|
|
||
|
def _parse_table(self, schema=False):
|
||
|
unnest = self._parse_unnest()
|
||
|
|
||
|
if unnest:
|
||
|
return unnest
|
||
|
|
||
|
subquery = self._parse_select(table=True)
|
||
|
|
||
|
if subquery:
|
||
|
return subquery
|
||
|
|
||
|
catalog = None
|
||
|
db = None
|
||
|
table = (not schema and self._parse_function()) or self._parse_id_var(False)
|
||
|
|
||
|
while self._match(TokenType.DOT):
|
||
|
catalog = db
|
||
|
db = table
|
||
|
table = self._parse_id_var()
|
||
|
|
||
|
if not table:
|
||
|
self.raise_error("Expected table name")
|
||
|
|
||
|
this = self.expression(exp.Table, this=table, db=db, catalog=catalog)
|
||
|
|
||
|
if schema:
|
||
|
return self._parse_schema(this=this)
|
||
|
|
||
|
if self.alias_post_tablesample:
|
||
|
table_sample = self._parse_table_sample()
|
||
|
|
||
|
alias = self._parse_table_alias()
|
||
|
|
||
|
if alias:
|
||
|
this = self.expression(exp.Alias, this=this, alias=alias)
|
||
|
|
||
|
if not self.alias_post_tablesample:
|
||
|
table_sample = self._parse_table_sample()
|
||
|
|
||
|
if table_sample:
|
||
|
table_sample.set("this", this)
|
||
|
this = table_sample
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_unnest(self):
|
||
|
if not self._match(TokenType.UNNEST):
|
||
|
return None
|
||
|
|
||
|
self._match_l_paren()
|
||
|
expressions = self._parse_csv(self._parse_column)
|
||
|
self._match_r_paren()
|
||
|
|
||
|
ordinality = bool(
|
||
|
self._match(TokenType.WITH) and self._match(TokenType.ORDINALITY)
|
||
|
)
|
||
|
|
||
|
alias = self._parse_table_alias()
|
||
|
|
||
|
if alias and self.unnest_column_only:
|
||
|
if alias.args.get("columns"):
|
||
|
self.raise_error("Unexpected extra column alias in unnest.")
|
||
|
alias.set("columns", [alias.this])
|
||
|
alias.set("this", None)
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Unnest,
|
||
|
expressions=expressions,
|
||
|
ordinality=ordinality,
|
||
|
alias=alias,
|
||
|
)
|
||
|
|
||
|
def _parse_table_sample(self):
|
||
|
if not self._match(TokenType.TABLE_SAMPLE):
|
||
|
return None
|
||
|
|
||
|
method = self._parse_var()
|
||
|
bucket_numerator = None
|
||
|
bucket_denominator = None
|
||
|
bucket_field = None
|
||
|
percent = None
|
||
|
rows = None
|
||
|
size = None
|
||
|
|
||
|
self._match_l_paren()
|
||
|
|
||
|
if self._match(TokenType.BUCKET):
|
||
|
bucket_numerator = self._parse_number()
|
||
|
self._match(TokenType.OUT_OF)
|
||
|
bucket_denominator = bucket_denominator = self._parse_number()
|
||
|
self._match(TokenType.ON)
|
||
|
bucket_field = self._parse_field()
|
||
|
else:
|
||
|
num = self._parse_number()
|
||
|
|
||
|
if self._match(TokenType.PERCENT):
|
||
|
percent = num
|
||
|
elif self._match(TokenType.ROWS):
|
||
|
rows = num
|
||
|
else:
|
||
|
size = num
|
||
|
|
||
|
self._match_r_paren()
|
||
|
|
||
|
return self.expression(
|
||
|
exp.TableSample,
|
||
|
method=method,
|
||
|
bucket_numerator=bucket_numerator,
|
||
|
bucket_denominator=bucket_denominator,
|
||
|
bucket_field=bucket_field,
|
||
|
percent=percent,
|
||
|
rows=rows,
|
||
|
size=size,
|
||
|
)
|
||
|
|
||
|
def _parse_where(self):
|
||
|
if not self._match(TokenType.WHERE):
|
||
|
return None
|
||
|
return self.expression(exp.Where, this=self._parse_conjunction())
|
||
|
|
||
|
def _parse_group(self):
|
||
|
if not self._match(TokenType.GROUP_BY):
|
||
|
return None
|
||
|
return self.expression(
|
||
|
exp.Group,
|
||
|
expressions=self._parse_csv(self._parse_conjunction),
|
||
|
grouping_sets=self._parse_grouping_sets(),
|
||
|
cube=self._match(TokenType.CUBE) and self._parse_wrapped_id_vars(),
|
||
|
rollup=self._match(TokenType.ROLLUP) and self._parse_wrapped_id_vars(),
|
||
|
)
|
||
|
|
||
|
def _parse_grouping_sets(self):
|
||
|
if not self._match(TokenType.GROUPING_SETS):
|
||
|
return None
|
||
|
|
||
|
self._match_l_paren()
|
||
|
grouping_sets = self._parse_csv(self._parse_grouping_set)
|
||
|
self._match_r_paren()
|
||
|
return grouping_sets
|
||
|
|
||
|
def _parse_grouping_set(self):
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
grouping_set = self._parse_csv(self._parse_id_var)
|
||
|
self._match_r_paren()
|
||
|
return self.expression(exp.Tuple, expressions=grouping_set)
|
||
|
return self._parse_id_var()
|
||
|
|
||
|
def _parse_having(self):
|
||
|
if not self._match(TokenType.HAVING):
|
||
|
return None
|
||
|
return self.expression(exp.Having, this=self._parse_conjunction())
|
||
|
|
||
|
def _parse_qualify(self):
|
||
|
if not self._match(TokenType.QUALIFY):
|
||
|
return None
|
||
|
return self.expression(exp.Qualify, this=self._parse_conjunction())
|
||
|
|
||
|
def _parse_order(self, this=None):
|
||
|
if not self._match(TokenType.ORDER_BY):
|
||
|
return this
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
|
||
|
)
|
||
|
|
||
|
def _parse_sort(self, token_type, exp_class):
|
||
|
if not self._match(token_type):
|
||
|
return None
|
||
|
|
||
|
return self.expression(
|
||
|
exp_class, expressions=self._parse_csv(self._parse_ordered)
|
||
|
)
|
||
|
|
||
|
def _parse_ordered(self):
|
||
|
this = self._parse_conjunction()
|
||
|
self._match(TokenType.ASC)
|
||
|
is_desc = self._match(TokenType.DESC)
|
||
|
is_nulls_first = self._match(TokenType.NULLS_FIRST)
|
||
|
is_nulls_last = self._match(TokenType.NULLS_LAST)
|
||
|
desc = is_desc or False
|
||
|
asc = not desc
|
||
|
nulls_first = is_nulls_first or False
|
||
|
explicitly_null_ordered = is_nulls_first or is_nulls_last
|
||
|
if (
|
||
|
not explicitly_null_ordered
|
||
|
and (
|
||
|
(asc and self.null_ordering == "nulls_are_small")
|
||
|
or (desc and self.null_ordering != "nulls_are_small")
|
||
|
)
|
||
|
and self.null_ordering != "nulls_are_last"
|
||
|
):
|
||
|
nulls_first = True
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Ordered, this=this, desc=desc, nulls_first=nulls_first
|
||
|
)
|
||
|
|
||
|
def _parse_limit(self, this=None, top=False):
|
||
|
if self._match(TokenType.TOP if top else TokenType.LIMIT):
|
||
|
return self.expression(
|
||
|
exp.Limit, this=this, expression=self._parse_number()
|
||
|
)
|
||
|
if self._match(TokenType.FETCH):
|
||
|
direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
|
||
|
direction = self._prev.text if direction else "FIRST"
|
||
|
count = self._parse_number()
|
||
|
self._match_set((TokenType.ROW, TokenType.ROWS))
|
||
|
self._match(TokenType.ONLY)
|
||
|
return self.expression(exp.Fetch, direction=direction, count=count)
|
||
|
return this
|
||
|
|
||
|
def _parse_offset(self, this=None):
|
||
|
if not self._match(TokenType.OFFSET):
|
||
|
return this
|
||
|
count = self._parse_number()
|
||
|
self._match_set((TokenType.ROW, TokenType.ROWS))
|
||
|
return self.expression(exp.Offset, this=this, expression=count)
|
||
|
|
||
|
def _parse_set_operations(self, this):
|
||
|
if not self._match_set(self.SET_OPERATIONS):
|
||
|
return this
|
||
|
|
||
|
token_type = self._prev.token_type
|
||
|
|
||
|
if token_type == TokenType.UNION:
|
||
|
expression = exp.Union
|
||
|
elif token_type == TokenType.EXCEPT:
|
||
|
expression = exp.Except
|
||
|
else:
|
||
|
expression = exp.Intersect
|
||
|
|
||
|
return self.expression(
|
||
|
expression,
|
||
|
this=this,
|
||
|
distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
|
||
|
expression=self._parse_select(),
|
||
|
)
|
||
|
|
||
|
def _parse_expression(self):
|
||
|
return self._parse_alias(self._parse_conjunction())
|
||
|
|
||
|
def _parse_conjunction(self):
|
||
|
return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
|
||
|
|
||
|
def _parse_equality(self):
|
||
|
return self._parse_tokens(self._parse_comparison, self.EQUALITY)
|
||
|
|
||
|
def _parse_comparison(self):
|
||
|
return self._parse_tokens(self._parse_range, self.COMPARISON)
|
||
|
|
||
|
def _parse_range(self):
|
||
|
this = self._parse_bitwise()
|
||
|
negate = self._match(TokenType.NOT)
|
||
|
|
||
|
if self._match_set(self.RANGE_PARSERS):
|
||
|
this = self.RANGE_PARSERS[self._prev.token_type](self, this)
|
||
|
|
||
|
if negate:
|
||
|
this = self.expression(exp.Not, this=this)
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_is(self, this):
|
||
|
negate = self._match(TokenType.NOT)
|
||
|
this = self.expression(
|
||
|
exp.Is,
|
||
|
this=this,
|
||
|
expression=self._parse_null() or self._parse_boolean(),
|
||
|
)
|
||
|
return self.expression(exp.Not, this=this) if negate else this
|
||
|
|
||
|
def _parse_in(self, this):
|
||
|
unnest = self._parse_unnest()
|
||
|
if unnest:
|
||
|
this = self.expression(exp.In, this=this, unnest=unnest)
|
||
|
else:
|
||
|
self._match_l_paren()
|
||
|
expressions = self._parse_csv(
|
||
|
lambda: self._parse_select() or self._parse_expression()
|
||
|
)
|
||
|
|
||
|
if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
|
||
|
this = self.expression(exp.In, this=this, query=expressions[0])
|
||
|
else:
|
||
|
this = self.expression(exp.In, this=this, expressions=expressions)
|
||
|
|
||
|
self._match_r_paren()
|
||
|
return this
|
||
|
|
||
|
def _parse_between(self, this):
|
||
|
low = self._parse_bitwise()
|
||
|
self._match(TokenType.AND)
|
||
|
high = self._parse_bitwise()
|
||
|
return self.expression(exp.Between, this=this, low=low, high=high)
|
||
|
|
||
|
def _parse_escape(self, this):
|
||
|
if not self._match(TokenType.ESCAPE):
|
||
|
return this
|
||
|
return self.expression(exp.Escape, this=this, expression=self._parse_string())
|
||
|
|
||
|
def _parse_bitwise(self):
|
||
|
this = self._parse_term()
|
||
|
|
||
|
while True:
|
||
|
if self._match_set(self.BITWISE):
|
||
|
this = self.expression(
|
||
|
self.BITWISE[self._prev.token_type],
|
||
|
this=this,
|
||
|
expression=self._parse_term(),
|
||
|
)
|
||
|
elif self._match_pair(TokenType.LT, TokenType.LT):
|
||
|
this = self.expression(
|
||
|
exp.BitwiseLeftShift, this=this, expression=self._parse_term()
|
||
|
)
|
||
|
elif self._match_pair(TokenType.GT, TokenType.GT):
|
||
|
this = self.expression(
|
||
|
exp.BitwiseRightShift, this=this, expression=self._parse_term()
|
||
|
)
|
||
|
else:
|
||
|
break
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_term(self):
|
||
|
return self._parse_tokens(self._parse_factor, self.TERM)
|
||
|
|
||
|
def _parse_factor(self):
|
||
|
return self._parse_tokens(self._parse_unary, self.FACTOR)
|
||
|
|
||
|
def _parse_unary(self):
|
||
|
if self._match(TokenType.NOT):
|
||
|
return self.expression(exp.Not, this=self._parse_equality())
|
||
|
if self._match(TokenType.TILDA):
|
||
|
return self.expression(exp.BitwiseNot, this=self._parse_unary())
|
||
|
if self._match(TokenType.DASH):
|
||
|
return self.expression(exp.Neg, this=self._parse_unary())
|
||
|
return self._parse_at_time_zone(self._parse_type())
|
||
|
|
||
|
def _parse_type(self):
|
||
|
if self._match(TokenType.INTERVAL):
|
||
|
return self.expression(
|
||
|
exp.Interval,
|
||
|
this=self._parse_term(),
|
||
|
unit=self._parse_var(),
|
||
|
)
|
||
|
|
||
|
index = self._index
|
||
|
type_token = self._parse_types()
|
||
|
this = self._parse_column()
|
||
|
|
||
|
if type_token:
|
||
|
if this:
|
||
|
return self.expression(exp.Cast, this=this, to=type_token)
|
||
|
if not type_token.args.get("expressions"):
|
||
|
self._retreat(index)
|
||
|
return self._parse_column()
|
||
|
return type_token
|
||
|
|
||
|
while self._match(TokenType.DCOLON):
|
||
|
type_token = self._parse_types()
|
||
|
if not type_token:
|
||
|
self.raise_error("Expected type")
|
||
|
this = self.expression(exp.Cast, this=this, to=type_token)
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_types(self):
|
||
|
index = self._index
|
||
|
|
||
|
if not self._match_set(self.TYPE_TOKENS):
|
||
|
return None
|
||
|
|
||
|
type_token = self._prev.token_type
|
||
|
nested = type_token in self.NESTED_TYPE_TOKENS
|
||
|
is_struct = type_token == TokenType.STRUCT
|
||
|
expressions = None
|
||
|
|
||
|
if self._match(TokenType.L_BRACKET):
|
||
|
self._retreat(index)
|
||
|
return None
|
||
|
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
if is_struct:
|
||
|
expressions = self._parse_csv(self._parse_struct_kwargs)
|
||
|
elif nested:
|
||
|
expressions = self._parse_csv(self._parse_types)
|
||
|
else:
|
||
|
expressions = self._parse_csv(self._parse_number)
|
||
|
|
||
|
if not expressions:
|
||
|
self._retreat(index)
|
||
|
return None
|
||
|
|
||
|
self._match_r_paren()
|
||
|
|
||
|
if nested and self._match(TokenType.LT):
|
||
|
if is_struct:
|
||
|
expressions = self._parse_csv(self._parse_struct_kwargs)
|
||
|
else:
|
||
|
expressions = self._parse_csv(self._parse_types)
|
||
|
|
||
|
if not self._match(TokenType.GT):
|
||
|
self.raise_error("Expecting >")
|
||
|
|
||
|
if type_token in self.TIMESTAMPS:
|
||
|
tz = self._match(TokenType.WITH_TIME_ZONE)
|
||
|
self._match(TokenType.WITHOUT_TIME_ZONE)
|
||
|
if tz:
|
||
|
return exp.DataType(
|
||
|
this=exp.DataType.Type.TIMESTAMPTZ,
|
||
|
expressions=expressions,
|
||
|
)
|
||
|
return exp.DataType(
|
||
|
this=exp.DataType.Type.TIMESTAMP,
|
||
|
expressions=expressions,
|
||
|
)
|
||
|
|
||
|
return exp.DataType(
|
||
|
this=exp.DataType.Type[type_token.value.upper()],
|
||
|
expressions=expressions,
|
||
|
nested=nested,
|
||
|
)
|
||
|
|
||
|
def _parse_struct_kwargs(self):
|
||
|
this = self._parse_id_var()
|
||
|
self._match(TokenType.COLON)
|
||
|
data_type = self._parse_types()
|
||
|
if not data_type:
|
||
|
return None
|
||
|
return self.expression(exp.StructKwarg, this=this, expression=data_type)
|
||
|
|
||
|
def _parse_at_time_zone(self, this):
|
||
|
if not self._match(TokenType.AT_TIME_ZONE):
|
||
|
return this
|
||
|
|
||
|
return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
|
||
|
|
||
|
def _parse_column(self):
|
||
|
this = self._parse_field()
|
||
|
if isinstance(this, exp.Identifier):
|
||
|
this = self.expression(exp.Column, this=this)
|
||
|
elif not this:
|
||
|
return self._parse_bracket(this)
|
||
|
this = self._parse_bracket(this)
|
||
|
|
||
|
while self._match_set(self.COLUMN_OPERATORS):
|
||
|
op = self.COLUMN_OPERATORS.get(self._prev.token_type)
|
||
|
field = self._parse_star() or self._parse_function() or self._parse_id_var()
|
||
|
|
||
|
if isinstance(field, exp.Func):
|
||
|
# bigquery allows function calls like x.y.count(...)
|
||
|
# SAFE.SUBSTR(...)
|
||
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
|
||
|
this = self._replace_columns_with_dots(this)
|
||
|
|
||
|
if op:
|
||
|
this = op(self, this, exp.Literal.string(field.name))
|
||
|
elif isinstance(this, exp.Column) and not this.table:
|
||
|
this = self.expression(exp.Column, this=field, table=this.this)
|
||
|
else:
|
||
|
this = self.expression(exp.Dot, this=this, expression=field)
|
||
|
this = self._parse_bracket(this)
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_primary(self):
|
||
|
if self._match_set(self.PRIMARY_PARSERS):
|
||
|
return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev)
|
||
|
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
query = self._parse_select()
|
||
|
|
||
|
if query:
|
||
|
expressions = [query]
|
||
|
else:
|
||
|
expressions = self._parse_csv(
|
||
|
lambda: self._parse_alias(self._parse_conjunction(), explicit=True)
|
||
|
)
|
||
|
|
||
|
this = list_get(expressions, 0)
|
||
|
self._parse_query_modifiers(this)
|
||
|
self._match_r_paren()
|
||
|
|
||
|
if isinstance(this, exp.Subqueryable):
|
||
|
return self._parse_subquery(this)
|
||
|
if len(expressions) > 1:
|
||
|
return self.expression(exp.Tuple, expressions=expressions)
|
||
|
return self.expression(exp.Paren, this=this)
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _parse_field(self, any_token=False):
|
||
|
return (
|
||
|
self._parse_primary()
|
||
|
or self._parse_function()
|
||
|
or self._parse_id_var(any_token)
|
||
|
)
|
||
|
|
||
|
def _parse_function(self):
|
||
|
if not self._curr:
|
||
|
return None
|
||
|
|
||
|
token_type = self._curr.token_type
|
||
|
|
||
|
if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
|
||
|
return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
|
||
|
|
||
|
if not self._next or self._next.token_type != TokenType.L_PAREN:
|
||
|
if token_type in self.NO_PAREN_FUNCTIONS:
|
||
|
return self.expression(
|
||
|
self._advance() or self.NO_PAREN_FUNCTIONS[token_type]
|
||
|
)
|
||
|
return None
|
||
|
|
||
|
if token_type not in self.FUNC_TOKENS:
|
||
|
return None
|
||
|
|
||
|
if self._match_set(self.FUNCTION_PARSERS):
|
||
|
self._advance()
|
||
|
this = self.FUNCTION_PARSERS[token_type](self, token_type)
|
||
|
else:
|
||
|
subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
|
||
|
this = self._curr.text
|
||
|
self._advance(2)
|
||
|
|
||
|
if subquery_predicate and self._curr.token_type in (
|
||
|
TokenType.SELECT,
|
||
|
TokenType.WITH,
|
||
|
):
|
||
|
this = self.expression(subquery_predicate, this=self._parse_select())
|
||
|
self._match_r_paren()
|
||
|
return this
|
||
|
|
||
|
function = self.FUNCTIONS.get(this.upper())
|
||
|
args = self._parse_csv(self._parse_lambda)
|
||
|
|
||
|
if function:
|
||
|
this = function(args)
|
||
|
self.validate_expression(this, args)
|
||
|
else:
|
||
|
this = self.expression(exp.Anonymous, this=this, expressions=args)
|
||
|
self._match_r_paren()
|
||
|
return self._parse_window(this)
|
||
|
|
||
|
def _parse_lambda(self):
|
||
|
index = self._index
|
||
|
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
expressions = self._parse_csv(self._parse_id_var)
|
||
|
self._match(TokenType.R_PAREN)
|
||
|
else:
|
||
|
expressions = [self._parse_id_var()]
|
||
|
|
||
|
if not self._match(TokenType.ARROW):
|
||
|
self._retreat(index)
|
||
|
|
||
|
distinct = self._match(TokenType.DISTINCT)
|
||
|
this = self._parse_conjunction()
|
||
|
|
||
|
if distinct:
|
||
|
this = self.expression(exp.Distinct, this=this)
|
||
|
|
||
|
if self._match(TokenType.IGNORE_NULLS):
|
||
|
this = self.expression(exp.IgnoreNulls, this=this)
|
||
|
else:
|
||
|
self._match(TokenType.RESPECT_NULLS)
|
||
|
|
||
|
return self._parse_alias(self._parse_limit(self._parse_order(this)))
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Lambda,
|
||
|
this=self._parse_conjunction(),
|
||
|
expressions=expressions,
|
||
|
)
|
||
|
|
||
|
def _parse_schema(self, this=None):
|
||
|
index = self._index
|
||
|
if not self._match(TokenType.L_PAREN) or self._match(TokenType.SELECT):
|
||
|
self._retreat(index)
|
||
|
return this
|
||
|
|
||
|
args = self._parse_csv(
|
||
|
lambda: self._parse_constraint()
|
||
|
or self._parse_column_def(self._parse_field())
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
return self.expression(exp.Schema, this=this, expressions=args)
|
||
|
|
||
|
def _parse_column_def(self, this):
|
||
|
kind = self._parse_types()
|
||
|
|
||
|
if not kind:
|
||
|
return this
|
||
|
|
||
|
constraints = []
|
||
|
while True:
|
||
|
constraint = self._parse_column_constraint()
|
||
|
if not constraint:
|
||
|
break
|
||
|
constraints.append(constraint)
|
||
|
|
||
|
return self.expression(
|
||
|
exp.ColumnDef, this=this, kind=kind, constraints=constraints
|
||
|
)
|
||
|
|
||
|
def _parse_column_constraint(self):
|
||
|
kind = None
|
||
|
this = None
|
||
|
|
||
|
if self._match(TokenType.CONSTRAINT):
|
||
|
this = self._parse_id_var()
|
||
|
|
||
|
if self._match(TokenType.AUTO_INCREMENT):
|
||
|
kind = exp.AutoIncrementColumnConstraint()
|
||
|
elif self._match(TokenType.CHECK):
|
||
|
self._match_l_paren()
|
||
|
kind = self.expression(
|
||
|
exp.CheckColumnConstraint, this=self._parse_conjunction()
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
elif self._match(TokenType.COLLATE):
|
||
|
kind = self.expression(exp.CollateColumnConstraint, this=self._parse_var())
|
||
|
elif self._match(TokenType.DEFAULT):
|
||
|
kind = self.expression(
|
||
|
exp.DefaultColumnConstraint, this=self._parse_field()
|
||
|
)
|
||
|
elif self._match(TokenType.NOT) and self._match(TokenType.NULL):
|
||
|
kind = exp.NotNullColumnConstraint()
|
||
|
elif self._match(TokenType.SCHEMA_COMMENT):
|
||
|
kind = self.expression(
|
||
|
exp.CommentColumnConstraint, this=self._parse_string()
|
||
|
)
|
||
|
elif self._match(TokenType.PRIMARY_KEY):
|
||
|
kind = exp.PrimaryKeyColumnConstraint()
|
||
|
elif self._match(TokenType.UNIQUE):
|
||
|
kind = exp.UniqueColumnConstraint()
|
||
|
|
||
|
if kind is None:
|
||
|
return None
|
||
|
|
||
|
return self.expression(exp.ColumnConstraint, this=this, kind=kind)
|
||
|
|
||
|
def _parse_constraint(self):
|
||
|
if not self._match(TokenType.CONSTRAINT):
|
||
|
return self._parse_unnamed_constraint()
|
||
|
|
||
|
this = self._parse_id_var()
|
||
|
expressions = []
|
||
|
|
||
|
while True:
|
||
|
constraint = self._parse_unnamed_constraint() or self._parse_function()
|
||
|
if not constraint:
|
||
|
break
|
||
|
expressions.append(constraint)
|
||
|
|
||
|
return self.expression(exp.Constraint, this=this, expressions=expressions)
|
||
|
|
||
|
def _parse_unnamed_constraint(self):
|
||
|
if not self._match_set(self.CONSTRAINT_PARSERS):
|
||
|
return None
|
||
|
|
||
|
return self.CONSTRAINT_PARSERS[self._prev.token_type](self)
|
||
|
|
||
|
def _parse_check(self):
|
||
|
self._match(TokenType.CHECK)
|
||
|
self._match_l_paren()
|
||
|
expression = self._parse_conjunction()
|
||
|
self._match_r_paren()
|
||
|
|
||
|
return self.expression(exp.Check, this=expression)
|
||
|
|
||
|
def _parse_unique(self):
|
||
|
self._match(TokenType.UNIQUE)
|
||
|
columns = self._parse_wrapped_id_vars()
|
||
|
|
||
|
return self.expression(exp.Unique, expressions=columns)
|
||
|
|
||
|
def _parse_foreign_key(self):
|
||
|
self._match(TokenType.FOREIGN_KEY)
|
||
|
|
||
|
expressions = self._parse_wrapped_id_vars()
|
||
|
reference = self._match(TokenType.REFERENCES) and self.expression(
|
||
|
exp.Reference,
|
||
|
this=self._parse_id_var(),
|
||
|
expressions=self._parse_wrapped_id_vars(),
|
||
|
)
|
||
|
options = {}
|
||
|
|
||
|
while self._match(TokenType.ON):
|
||
|
if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
|
||
|
self.raise_error("Expected DELETE or UPDATE")
|
||
|
kind = self._prev.text.lower()
|
||
|
|
||
|
if self._match(TokenType.NO_ACTION):
|
||
|
action = "NO ACTION"
|
||
|
elif self._match(TokenType.SET):
|
||
|
self._match_set((TokenType.NULL, TokenType.DEFAULT))
|
||
|
action = "SET " + self._prev.text.upper()
|
||
|
else:
|
||
|
self._advance()
|
||
|
action = self._prev.text.upper()
|
||
|
options[kind] = action
|
||
|
|
||
|
return self.expression(
|
||
|
exp.ForeignKey,
|
||
|
expressions=expressions,
|
||
|
reference=reference,
|
||
|
**options,
|
||
|
)
|
||
|
|
||
|
def _parse_bracket(self, this):
|
||
|
if not self._match(TokenType.L_BRACKET):
|
||
|
return this
|
||
|
|
||
|
expressions = self._parse_csv(self._parse_conjunction)
|
||
|
|
||
|
if not this or this.name.upper() == "ARRAY":
|
||
|
this = self.expression(exp.Array, expressions=expressions)
|
||
|
else:
|
||
|
expressions = apply_index_offset(expressions, -self.index_offset)
|
||
|
this = self.expression(exp.Bracket, this=this, expressions=expressions)
|
||
|
|
||
|
if not self._match(TokenType.R_BRACKET):
|
||
|
self.raise_error("Expected ]")
|
||
|
|
||
|
return self._parse_bracket(this)
|
||
|
|
||
|
def _parse_case(self):
|
||
|
ifs = []
|
||
|
default = None
|
||
|
|
||
|
expression = self._parse_conjunction()
|
||
|
|
||
|
while self._match(TokenType.WHEN):
|
||
|
this = self._parse_conjunction()
|
||
|
self._match(TokenType.THEN)
|
||
|
then = self._parse_conjunction()
|
||
|
ifs.append(self.expression(exp.If, this=this, true=then))
|
||
|
|
||
|
if self._match(TokenType.ELSE):
|
||
|
default = self._parse_conjunction()
|
||
|
|
||
|
if not self._match(TokenType.END):
|
||
|
self.raise_error("Expected END after CASE", self._prev)
|
||
|
|
||
|
return self._parse_window(
|
||
|
self.expression(exp.Case, this=expression, ifs=ifs, default=default)
|
||
|
)
|
||
|
|
||
|
def _parse_if(self):
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
args = self._parse_csv(self._parse_conjunction)
|
||
|
this = exp.If.from_arg_list(args)
|
||
|
self.validate_expression(this, args)
|
||
|
self._match_r_paren()
|
||
|
else:
|
||
|
condition = self._parse_conjunction()
|
||
|
self._match(TokenType.THEN)
|
||
|
true = self._parse_conjunction()
|
||
|
false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
|
||
|
self._match(TokenType.END)
|
||
|
this = self.expression(exp.If, this=condition, true=true, false=false)
|
||
|
return self._parse_window(this)
|
||
|
|
||
|
def _parse_extract(self):
|
||
|
this = self._parse_var() or self._parse_type()
|
||
|
|
||
|
if not self._match(TokenType.FROM):
|
||
|
self.raise_error("Expected FROM after EXTRACT", self._prev)
|
||
|
|
||
|
return self.expression(exp.Extract, this=this, expression=self._parse_type())
|
||
|
|
||
|
def _parse_cast(self, strict):
|
||
|
this = self._parse_conjunction()
|
||
|
|
||
|
if not self._match(TokenType.ALIAS):
|
||
|
self.raise_error("Expected AS after CAST")
|
||
|
|
||
|
to = self._parse_types()
|
||
|
|
||
|
if not to:
|
||
|
self.raise_error("Expected TYPE after CAST")
|
||
|
elif to.this == exp.DataType.Type.CHAR:
|
||
|
if self._match(TokenType.CHARACTER_SET):
|
||
|
to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
|
||
|
|
||
|
return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
|
||
|
|
||
|
def _parse_convert(self):
|
||
|
this = self._parse_field()
|
||
|
if self._match(TokenType.USING):
|
||
|
to = self.expression(exp.CharacterSet, this=self._parse_var())
|
||
|
elif self._match(TokenType.COMMA):
|
||
|
to = self._parse_types()
|
||
|
else:
|
||
|
to = None
|
||
|
return self.expression(exp.Cast, this=this, to=to)
|
||
|
|
||
|
def _parse_window(self, this, alias=False):
|
||
|
if self._match(TokenType.FILTER):
|
||
|
self._match_l_paren()
|
||
|
this = self.expression(
|
||
|
exp.Filter, this=this, expression=self._parse_where()
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
|
||
|
if self._match(TokenType.WITHIN_GROUP):
|
||
|
self._match_l_paren()
|
||
|
this = self.expression(
|
||
|
exp.WithinGroup,
|
||
|
this=this,
|
||
|
expression=self._parse_order(),
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
return this
|
||
|
|
||
|
# bigquery select from window x AS (partition by ...)
|
||
|
if alias:
|
||
|
self._match(TokenType.ALIAS)
|
||
|
elif not self._match(TokenType.OVER):
|
||
|
return this
|
||
|
|
||
|
if not self._match(TokenType.L_PAREN):
|
||
|
alias = self._parse_id_var(False)
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Window,
|
||
|
this=this,
|
||
|
alias=alias,
|
||
|
)
|
||
|
|
||
|
partition = None
|
||
|
|
||
|
alias = self._parse_id_var(False)
|
||
|
|
||
|
if self._match(TokenType.PARTITION_BY):
|
||
|
partition = self._parse_csv(self._parse_conjunction)
|
||
|
|
||
|
order = self._parse_order()
|
||
|
|
||
|
spec = None
|
||
|
kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
|
||
|
|
||
|
if kind:
|
||
|
self._match(TokenType.BETWEEN)
|
||
|
start = self._parse_window_spec()
|
||
|
self._match(TokenType.AND)
|
||
|
end = self._parse_window_spec()
|
||
|
|
||
|
spec = self.expression(
|
||
|
exp.WindowSpec,
|
||
|
kind=kind,
|
||
|
start=start["value"],
|
||
|
start_side=start["side"],
|
||
|
end=end["value"],
|
||
|
end_side=end["side"],
|
||
|
)
|
||
|
|
||
|
self._match_r_paren()
|
||
|
|
||
|
return self.expression(
|
||
|
exp.Window,
|
||
|
this=this,
|
||
|
partition_by=partition,
|
||
|
order=order,
|
||
|
spec=spec,
|
||
|
alias=alias,
|
||
|
)
|
||
|
|
||
|
def _parse_window_spec(self):
|
||
|
self._match(TokenType.BETWEEN)
|
||
|
|
||
|
return {
|
||
|
"value": (
|
||
|
self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW))
|
||
|
and self._prev.text
|
||
|
)
|
||
|
or self._parse_bitwise(),
|
||
|
"side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING))
|
||
|
and self._prev.text,
|
||
|
}
|
||
|
|
||
|
def _parse_alias(self, this, explicit=False):
|
||
|
any_token = self._match(TokenType.ALIAS)
|
||
|
|
||
|
if explicit and not any_token:
|
||
|
return this
|
||
|
|
||
|
if self._match(TokenType.L_PAREN):
|
||
|
aliases = self.expression(
|
||
|
exp.Aliases,
|
||
|
this=this,
|
||
|
expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
|
||
|
)
|
||
|
self._match_r_paren()
|
||
|
return aliases
|
||
|
|
||
|
alias = self._parse_id_var(any_token)
|
||
|
|
||
|
if alias:
|
||
|
return self.expression(exp.Alias, this=this, alias=alias)
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_id_var(self, any_token=True):
|
||
|
identifier = self._parse_identifier()
|
||
|
|
||
|
if identifier:
|
||
|
return identifier
|
||
|
|
||
|
if (
|
||
|
any_token
|
||
|
and self._curr
|
||
|
and self._curr.token_type not in self.RESERVED_KEYWORDS
|
||
|
):
|
||
|
return self._advance() or exp.Identifier(this=self._prev.text, quoted=False)
|
||
|
|
||
|
return self._match_set(self.ID_VAR_TOKENS) and exp.Identifier(
|
||
|
this=self._prev.text, quoted=False
|
||
|
)
|
||
|
|
||
|
def _parse_string(self):
|
||
|
if self._match(TokenType.STRING):
|
||
|
return exp.Literal.string(self._prev.text)
|
||
|
return self._parse_placeholder()
|
||
|
|
||
|
def _parse_number(self):
|
||
|
if self._match(TokenType.NUMBER):
|
||
|
return exp.Literal.number(self._prev.text)
|
||
|
return self._parse_placeholder()
|
||
|
|
||
|
def _parse_identifier(self):
|
||
|
if self._match(TokenType.IDENTIFIER):
|
||
|
return exp.Identifier(this=self._prev.text, quoted=True)
|
||
|
return self._parse_placeholder()
|
||
|
|
||
|
def _parse_var(self):
|
||
|
if self._match(TokenType.VAR):
|
||
|
return exp.Var(this=self._prev.text)
|
||
|
return self._parse_placeholder()
|
||
|
|
||
|
def _parse_var_or_string(self):
|
||
|
return self._parse_var() or self._parse_string()
|
||
|
|
||
|
def _parse_null(self):
|
||
|
if self._match(TokenType.NULL):
|
||
|
return exp.Null()
|
||
|
return None
|
||
|
|
||
|
def _parse_boolean(self):
|
||
|
if self._match(TokenType.TRUE):
|
||
|
return exp.Boolean(this=True)
|
||
|
if self._match(TokenType.FALSE):
|
||
|
return exp.Boolean(this=False)
|
||
|
return None
|
||
|
|
||
|
def _parse_star(self):
|
||
|
if self._match(TokenType.STAR):
|
||
|
return exp.Star(
|
||
|
**{"except": self._parse_except(), "replace": self._parse_replace()}
|
||
|
)
|
||
|
return None
|
||
|
|
||
|
def _parse_placeholder(self):
|
||
|
if self._match(TokenType.PLACEHOLDER):
|
||
|
return exp.Placeholder()
|
||
|
return None
|
||
|
|
||
|
def _parse_except(self):
|
||
|
if not self._match(TokenType.EXCEPT):
|
||
|
return None
|
||
|
|
||
|
return self._parse_wrapped_id_vars()
|
||
|
|
||
|
def _parse_replace(self):
|
||
|
if not self._match(TokenType.REPLACE):
|
||
|
return None
|
||
|
|
||
|
self._match_l_paren()
|
||
|
columns = self._parse_csv(lambda: self._parse_alias(self._parse_expression()))
|
||
|
self._match_r_paren()
|
||
|
return columns
|
||
|
|
||
|
def _parse_csv(self, parse):
|
||
|
parse_result = parse()
|
||
|
items = [parse_result] if parse_result is not None else []
|
||
|
|
||
|
while self._match(TokenType.COMMA):
|
||
|
parse_result = parse()
|
||
|
if parse_result is not None:
|
||
|
items.append(parse_result)
|
||
|
|
||
|
return items
|
||
|
|
||
|
def _parse_tokens(self, parse, expressions):
|
||
|
this = parse()
|
||
|
|
||
|
while self._match_set(expressions):
|
||
|
this = self.expression(
|
||
|
expressions[self._prev.token_type], this=this, expression=parse()
|
||
|
)
|
||
|
|
||
|
return this
|
||
|
|
||
|
def _parse_all(self, parse):
|
||
|
return list(iter(parse, None))
|
||
|
|
||
|
def _parse_wrapped_id_vars(self):
|
||
|
self._match_l_paren()
|
||
|
expressions = self._parse_csv(self._parse_id_var)
|
||
|
self._match_r_paren()
|
||
|
return expressions
|
||
|
|
||
|
def _match(self, token_type):
|
||
|
if not self._curr:
|
||
|
return None
|
||
|
|
||
|
if self._curr.token_type == token_type:
|
||
|
self._advance()
|
||
|
return True
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _match_set(self, types):
|
||
|
if not self._curr:
|
||
|
return None
|
||
|
|
||
|
if self._curr.token_type in types:
|
||
|
self._advance()
|
||
|
return True
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _match_pair(self, token_type_a, token_type_b, advance=True):
|
||
|
if not self._curr or not self._next:
|
||
|
return None
|
||
|
|
||
|
if (
|
||
|
self._curr.token_type == token_type_a
|
||
|
and self._next.token_type == token_type_b
|
||
|
):
|
||
|
if advance:
|
||
|
self._advance(2)
|
||
|
return True
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _match_l_paren(self):
|
||
|
if not self._match(TokenType.L_PAREN):
|
||
|
self.raise_error("Expecting (")
|
||
|
|
||
|
def _match_r_paren(self):
|
||
|
if not self._match(TokenType.R_PAREN):
|
||
|
self.raise_error("Expecting )")
|
||
|
|
||
|
def _replace_columns_with_dots(self, this):
|
||
|
if isinstance(this, exp.Dot):
|
||
|
exp.replace_children(this, self._replace_columns_with_dots)
|
||
|
elif isinstance(this, exp.Column):
|
||
|
exp.replace_children(this, self._replace_columns_with_dots)
|
||
|
table = this.args.get("table")
|
||
|
this = (
|
||
|
self.expression(exp.Dot, this=table, expression=this.this)
|
||
|
if table
|
||
|
else self.expression(exp.Var, this=this.name)
|
||
|
)
|
||
|
elif isinstance(this, exp.Identifier):
|
||
|
this = self.expression(exp.Var, this=this.name)
|
||
|
return this
|