2025-02-13 14:53:05 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2025-02-13 15:42:13 +01:00
|
|
|
import typing as t
|
|
|
|
|
2025-02-13 15:53:39 +01:00
|
|
|
from sqlglot import exp, generator, parser, tokens, transforms
|
2025-02-13 14:40:43 +01:00
|
|
|
from sqlglot.dialects.dialect import (
|
|
|
|
Dialect,
|
2025-02-13 21:17:09 +01:00
|
|
|
NormalizationStrategy,
|
2025-02-13 21:03:38 +01:00
|
|
|
binary_from_function,
|
2025-02-13 21:17:09 +01:00
|
|
|
date_delta_sql,
|
2025-02-13 15:46:19 +01:00
|
|
|
date_trunc_to_time,
|
2025-02-13 15:01:55 +01:00
|
|
|
datestrtodate_sql,
|
2025-02-13 14:40:43 +01:00
|
|
|
format_time_lambda,
|
2025-02-13 21:03:38 +01:00
|
|
|
if_sql,
|
2025-02-13 14:40:43 +01:00
|
|
|
inline_array_sql,
|
2025-02-13 15:48:10 +01:00
|
|
|
max_or_greatest,
|
2025-02-13 15:43:32 +01:00
|
|
|
min_or_least,
|
2025-02-13 14:40:43 +01:00
|
|
|
rename_func,
|
2025-02-13 15:46:19 +01:00
|
|
|
timestamptrunc_sql,
|
2025-02-13 15:01:55 +01:00
|
|
|
timestrtotime_sql,
|
2025-02-13 14:56:25 +01:00
|
|
|
var_map_sql,
|
2025-02-13 14:40:43 +01:00
|
|
|
)
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot.expressions import Literal
|
2025-02-13 21:20:36 +01:00
|
|
|
from sqlglot.helper import is_int, seq_get
|
2025-02-13 14:53:05 +01:00
|
|
|
from sqlglot.tokens import TokenType
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 21:19:58 +01:00
|
|
|
if t.TYPE_CHECKING:
|
|
|
|
from sqlglot._typing import E
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
|
2025-02-13 21:16:09 +01:00
|
|
|
def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]:
|
2025-02-13 06:15:54 +01:00
|
|
|
if len(args) == 2:
|
|
|
|
first_arg, second_arg = args
|
|
|
|
if second_arg.is_string:
|
|
|
|
# case: <string_expr> [ , <format> ]
|
|
|
|
return format_time_lambda(exp.StrToTime, "snowflake")(args)
|
2025-02-13 21:19:14 +01:00
|
|
|
return exp.UnixToTime(this=first_arg, scale=second_arg)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 15:57:23 +01:00
|
|
|
from sqlglot.optimizer.simplify import simplify_literals
|
|
|
|
|
|
|
|
# The first argument might be an expression like 40 * 365 * 86400, so we try to
|
|
|
|
# reduce it using `simplify_literals` first and then check if it's a Literal.
|
2025-02-13 14:53:05 +01:00
|
|
|
first_arg = seq_get(args, 0)
|
2025-02-13 15:57:23 +01:00
|
|
|
if not isinstance(simplify_literals(first_arg, root=True), Literal):
|
2025-02-13 21:16:09 +01:00
|
|
|
# case: <variant_expr> or other expressions such as columns
|
|
|
|
return exp.TimeStrToTime.from_arg_list(args)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
if first_arg.is_string:
|
2025-02-13 21:20:36 +01:00
|
|
|
if is_int(first_arg.this):
|
2025-02-13 06:15:54 +01:00
|
|
|
# case: <integer>
|
|
|
|
return exp.UnixToTime.from_arg_list(args)
|
|
|
|
|
|
|
|
# case: <date_expr>
|
|
|
|
return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
|
|
|
|
|
|
|
|
# case: <numeric_expr>
|
|
|
|
return exp.UnixToTime.from_arg_list(args)
|
|
|
|
|
|
|
|
|
2025-02-13 15:57:23 +01:00
|
|
|
def _parse_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]:
|
|
|
|
expression = parser.parse_var_map(args)
|
|
|
|
|
|
|
|
if isinstance(expression, exp.StarMap):
|
|
|
|
return expression
|
|
|
|
|
|
|
|
return exp.Struct(
|
|
|
|
expressions=[
|
|
|
|
t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values)
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2025-02-13 20:39:26 +01:00
|
|
|
def _parse_datediff(args: t.List) -> exp.DateDiff:
|
2025-02-13 21:19:14 +01:00
|
|
|
return exp.DateDiff(
|
|
|
|
this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0))
|
|
|
|
)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
2025-02-13 14:48:46 +01:00
|
|
|
# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
|
|
|
|
# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
|
2025-02-13 20:58:22 +01:00
|
|
|
def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]:
|
2025-02-13 14:48:46 +01:00
|
|
|
this = self._parse_var() or self._parse_type()
|
2025-02-13 15:52:09 +01:00
|
|
|
|
|
|
|
if not this:
|
|
|
|
return None
|
|
|
|
|
2025-02-13 14:48:46 +01:00
|
|
|
self._match(TokenType.COMMA)
|
|
|
|
expression = self._parse_bitwise()
|
2025-02-13 21:19:14 +01:00
|
|
|
this = _map_date_part(this)
|
2025-02-13 14:48:46 +01:00
|
|
|
name = this.name.upper()
|
2025-02-13 21:19:14 +01:00
|
|
|
|
2025-02-13 14:48:46 +01:00
|
|
|
if name.startswith("EPOCH"):
|
2025-02-13 21:19:14 +01:00
|
|
|
if name == "EPOCH_MILLISECOND":
|
2025-02-13 14:48:46 +01:00
|
|
|
scale = 10**3
|
2025-02-13 21:19:14 +01:00
|
|
|
elif name == "EPOCH_MICROSECOND":
|
2025-02-13 14:48:46 +01:00
|
|
|
scale = 10**6
|
2025-02-13 21:19:14 +01:00
|
|
|
elif name == "EPOCH_NANOSECOND":
|
2025-02-13 14:48:46 +01:00
|
|
|
scale = 10**9
|
|
|
|
else:
|
|
|
|
scale = None
|
|
|
|
|
|
|
|
ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
|
2025-02-13 15:52:09 +01:00
|
|
|
to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts)
|
2025-02-13 14:48:46 +01:00
|
|
|
|
|
|
|
if scale:
|
|
|
|
to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
|
|
|
|
|
|
|
|
return to_unix
|
|
|
|
|
|
|
|
return self.expression(exp.Extract, this=this, expression=expression)
|
|
|
|
|
|
|
|
|
2025-02-13 15:42:13 +01:00
|
|
|
# https://docs.snowflake.com/en/sql-reference/functions/div0
|
2025-02-13 20:46:55 +01:00
|
|
|
def _div0_to_if(args: t.List) -> exp.If:
|
2025-02-13 15:42:13 +01:00
|
|
|
cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0))
|
|
|
|
true = exp.Literal.number(0)
|
2025-02-13 15:44:58 +01:00
|
|
|
false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1))
|
2025-02-13 15:42:13 +01:00
|
|
|
return exp.If(this=cond, true=true, false=false)
|
|
|
|
|
|
|
|
|
|
|
|
# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
|
2025-02-13 20:46:55 +01:00
|
|
|
def _zeroifnull_to_if(args: t.List) -> exp.If:
|
2025-02-13 15:43:32 +01:00
|
|
|
cond = exp.Is(this=seq_get(args, 0), expression=exp.Null())
|
2025-02-13 15:42:13 +01:00
|
|
|
return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
|
|
|
|
|
|
|
|
|
2025-02-13 15:43:32 +01:00
|
|
|
# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
|
2025-02-13 20:46:55 +01:00
|
|
|
def _nullifzero_to_if(args: t.List) -> exp.If:
|
2025-02-13 15:43:32 +01:00
|
|
|
cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
|
|
|
|
return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0))
|
|
|
|
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str:
|
2025-02-13 15:58:40 +01:00
|
|
|
if expression.is_type("array"):
|
2025-02-13 14:56:25 +01:00
|
|
|
return "ARRAY"
|
2025-02-13 15:58:40 +01:00
|
|
|
elif expression.is_type("map"):
|
2025-02-13 14:56:25 +01:00
|
|
|
return "OBJECT"
|
|
|
|
return self.datatype_sql(expression)
|
|
|
|
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str:
|
|
|
|
flag = expression.text("flag")
|
|
|
|
|
|
|
|
if "i" not in flag:
|
|
|
|
flag += "i"
|
|
|
|
|
|
|
|
return self.func(
|
|
|
|
"REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2025-02-13 20:46:55 +01:00
|
|
|
def _parse_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]:
|
2025-02-13 15:57:23 +01:00
|
|
|
if len(args) == 3:
|
|
|
|
return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args)
|
|
|
|
return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0))
|
|
|
|
|
|
|
|
|
2025-02-13 20:46:55 +01:00
|
|
|
def _parse_regexp_replace(args: t.List) -> exp.RegexpReplace:
|
|
|
|
regexp_replace = exp.RegexpReplace.from_arg_list(args)
|
|
|
|
|
|
|
|
if not regexp_replace.args.get("replacement"):
|
|
|
|
regexp_replace.set("replacement", exp.Literal.string(""))
|
|
|
|
|
|
|
|
return regexp_replace
|
|
|
|
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]:
|
|
|
|
def _parse(self: Snowflake.Parser) -> exp.Show:
|
|
|
|
return self._parse_show_snowflake(*args, **kwargs)
|
|
|
|
|
|
|
|
return _parse
|
|
|
|
|
|
|
|
|
2025-02-13 21:19:14 +01:00
|
|
|
DATE_PART_MAPPING = {
|
|
|
|
"Y": "YEAR",
|
|
|
|
"YY": "YEAR",
|
|
|
|
"YYY": "YEAR",
|
|
|
|
"YYYY": "YEAR",
|
|
|
|
"YR": "YEAR",
|
|
|
|
"YEARS": "YEAR",
|
|
|
|
"YRS": "YEAR",
|
|
|
|
"MM": "MONTH",
|
|
|
|
"MON": "MONTH",
|
|
|
|
"MONS": "MONTH",
|
|
|
|
"MONTHS": "MONTH",
|
|
|
|
"D": "DAY",
|
|
|
|
"DD": "DAY",
|
|
|
|
"DAYS": "DAY",
|
|
|
|
"DAYOFMONTH": "DAY",
|
|
|
|
"WEEKDAY": "DAYOFWEEK",
|
|
|
|
"DOW": "DAYOFWEEK",
|
|
|
|
"DW": "DAYOFWEEK",
|
|
|
|
"WEEKDAY_ISO": "DAYOFWEEKISO",
|
|
|
|
"DOW_ISO": "DAYOFWEEKISO",
|
|
|
|
"DW_ISO": "DAYOFWEEKISO",
|
|
|
|
"YEARDAY": "DAYOFYEAR",
|
|
|
|
"DOY": "DAYOFYEAR",
|
|
|
|
"DY": "DAYOFYEAR",
|
|
|
|
"W": "WEEK",
|
|
|
|
"WK": "WEEK",
|
|
|
|
"WEEKOFYEAR": "WEEK",
|
|
|
|
"WOY": "WEEK",
|
|
|
|
"WY": "WEEK",
|
|
|
|
"WEEK_ISO": "WEEKISO",
|
|
|
|
"WEEKOFYEARISO": "WEEKISO",
|
|
|
|
"WEEKOFYEAR_ISO": "WEEKISO",
|
|
|
|
"Q": "QUARTER",
|
|
|
|
"QTR": "QUARTER",
|
|
|
|
"QTRS": "QUARTER",
|
|
|
|
"QUARTERS": "QUARTER",
|
|
|
|
"H": "HOUR",
|
|
|
|
"HH": "HOUR",
|
|
|
|
"HR": "HOUR",
|
|
|
|
"HOURS": "HOUR",
|
|
|
|
"HRS": "HOUR",
|
|
|
|
"M": "MINUTE",
|
|
|
|
"MI": "MINUTE",
|
|
|
|
"MIN": "MINUTE",
|
|
|
|
"MINUTES": "MINUTE",
|
|
|
|
"MINS": "MINUTE",
|
|
|
|
"S": "SECOND",
|
|
|
|
"SEC": "SECOND",
|
|
|
|
"SECONDS": "SECOND",
|
|
|
|
"SECS": "SECOND",
|
|
|
|
"MS": "MILLISECOND",
|
|
|
|
"MSEC": "MILLISECOND",
|
|
|
|
"MILLISECONDS": "MILLISECOND",
|
|
|
|
"US": "MICROSECOND",
|
|
|
|
"USEC": "MICROSECOND",
|
|
|
|
"MICROSECONDS": "MICROSECOND",
|
|
|
|
"NS": "NANOSECOND",
|
|
|
|
"NSEC": "NANOSECOND",
|
|
|
|
"NANOSEC": "NANOSECOND",
|
|
|
|
"NSECOND": "NANOSECOND",
|
|
|
|
"NSECONDS": "NANOSECOND",
|
|
|
|
"NANOSECS": "NANOSECOND",
|
|
|
|
"EPOCH": "EPOCH_SECOND",
|
|
|
|
"EPOCH_SECONDS": "EPOCH_SECOND",
|
|
|
|
"EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
|
|
|
|
"EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
|
|
|
|
"EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
|
|
|
|
"TZH": "TIMEZONE_HOUR",
|
|
|
|
"TZM": "TIMEZONE_MINUTE",
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@t.overload
|
|
|
|
def _map_date_part(part: exp.Expression) -> exp.Var:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@t.overload
|
|
|
|
def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _map_date_part(part):
|
|
|
|
mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None
|
|
|
|
return exp.var(mapped) if mapped else part
|
|
|
|
|
|
|
|
|
|
|
|
def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
|
|
|
|
trunc = date_trunc_to_time(args)
|
|
|
|
trunc.set("unit", _map_date_part(trunc.args["unit"]))
|
|
|
|
return trunc
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_colon_get_path(
|
|
|
|
self: parser.Parser, this: t.Optional[exp.Expression]
|
|
|
|
) -> t.Optional[exp.Expression]:
|
|
|
|
while True:
|
|
|
|
path = self._parse_bitwise()
|
|
|
|
|
|
|
|
# The cast :: operator has a lower precedence than the extraction operator :, so
|
|
|
|
# we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH
|
|
|
|
if isinstance(path, exp.Cast):
|
|
|
|
target_type = path.to
|
|
|
|
path = path.this
|
|
|
|
else:
|
|
|
|
target_type = None
|
|
|
|
|
|
|
|
if isinstance(path, exp.Expression):
|
|
|
|
path = exp.Literal.string(path.sql(dialect="snowflake"))
|
|
|
|
|
|
|
|
# The extraction operator : is left-associative
|
2025-02-13 21:20:36 +01:00
|
|
|
this = self.expression(
|
|
|
|
exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path)
|
|
|
|
)
|
2025-02-13 21:19:14 +01:00
|
|
|
|
|
|
|
if target_type:
|
|
|
|
this = exp.cast(this, target_type)
|
|
|
|
|
|
|
|
if not self._match(TokenType.COLON):
|
|
|
|
break
|
|
|
|
|
2025-02-13 21:19:58 +01:00
|
|
|
return self._parse_range(this)
|
2025-02-13 21:19:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _parse_timestamp_from_parts(args: t.List) -> exp.Func:
|
|
|
|
if len(args) == 2:
|
|
|
|
# Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
|
|
|
|
# so we parse this into Anonymous for now instead of introducing complexity
|
|
|
|
return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
|
|
|
|
|
|
|
|
return exp.TimestampFromParts.from_arg_list(args)
|
|
|
|
|
|
|
|
|
|
|
|
def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression:
|
|
|
|
"""
|
|
|
|
Snowflake doesn't allow columns referenced in UNPIVOT to be qualified,
|
|
|
|
so we need to unqualify them.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
>>> from sqlglot import parse_one
|
|
|
|
>>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))")
|
|
|
|
>>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake"))
|
|
|
|
SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april))
|
|
|
|
"""
|
|
|
|
if isinstance(expression, exp.Pivot) and expression.unpivot:
|
|
|
|
expression = transforms.unqualify_columns(expression)
|
|
|
|
|
|
|
|
return expression
|
|
|
|
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
class Snowflake(Dialect):
|
2025-02-13 20:04:59 +01:00
|
|
|
# https://docs.snowflake.com/en/sql-reference/identifiers-syntax
|
2025-02-13 21:17:09 +01:00
|
|
|
NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
|
2025-02-13 16:00:51 +01:00
|
|
|
NULL_ORDERING = "nulls_are_large"
|
|
|
|
TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
|
2025-02-13 21:02:36 +01:00
|
|
|
SUPPORTS_USER_DEFINED_TYPES = False
|
2025-02-13 21:03:38 +01:00
|
|
|
SUPPORTS_SEMI_ANTI_JOIN = False
|
2025-02-13 21:19:14 +01:00
|
|
|
PREFER_CTE_ALIAS_COLUMN = True
|
|
|
|
TABLESAMPLE_SIZE_IS_PERCENT = True
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 16:00:51 +01:00
|
|
|
TIME_MAPPING = {
|
2025-02-13 06:15:54 +01:00
|
|
|
"YYYY": "%Y",
|
|
|
|
"yyyy": "%Y",
|
|
|
|
"YY": "%y",
|
|
|
|
"yy": "%y",
|
|
|
|
"MMMM": "%B",
|
|
|
|
"mmmm": "%B",
|
|
|
|
"MON": "%b",
|
|
|
|
"mon": "%b",
|
|
|
|
"MM": "%m",
|
|
|
|
"mm": "%m",
|
|
|
|
"DD": "%d",
|
2025-02-13 15:52:09 +01:00
|
|
|
"dd": "%-d",
|
|
|
|
"DY": "%a",
|
2025-02-13 06:15:54 +01:00
|
|
|
"dy": "%w",
|
|
|
|
"HH24": "%H",
|
|
|
|
"hh24": "%H",
|
|
|
|
"HH12": "%I",
|
|
|
|
"hh12": "%I",
|
|
|
|
"MI": "%M",
|
|
|
|
"mi": "%M",
|
|
|
|
"SS": "%S",
|
|
|
|
"ss": "%S",
|
|
|
|
"FF": "%f",
|
|
|
|
"ff": "%f",
|
|
|
|
"FF6": "%f",
|
|
|
|
"ff6": "%f",
|
|
|
|
}
|
|
|
|
|
2025-02-13 21:17:09 +01:00
|
|
|
def quote_identifier(self, expression: E, identify: bool = True) -> E:
|
|
|
|
# This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an
|
|
|
|
# unquoted DUAL keyword in a special way and does not map it to a user-defined table
|
|
|
|
if (
|
|
|
|
isinstance(expression, exp.Identifier)
|
|
|
|
and isinstance(expression.parent, exp.Table)
|
|
|
|
and expression.name.lower() == "dual"
|
|
|
|
):
|
2025-02-13 21:19:58 +01:00
|
|
|
return expression # type: ignore
|
2025-02-13 21:17:09 +01:00
|
|
|
|
|
|
|
return super().quote_identifier(expression, identify=identify)
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
class Parser(parser.Parser):
|
2025-02-13 15:57:23 +01:00
|
|
|
IDENTIFY_PIVOT_STRINGS = True
|
2025-02-13 15:52:09 +01:00
|
|
|
|
2025-02-13 21:04:58 +01:00
|
|
|
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW}
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
FUNCTIONS = {
|
2025-02-13 14:53:05 +01:00
|
|
|
**parser.Parser.FUNCTIONS,
|
2025-02-13 06:15:54 +01:00
|
|
|
"ARRAYAGG": exp.ArrayAgg.from_arg_list,
|
2025-02-13 15:44:58 +01:00
|
|
|
"ARRAY_CONSTRUCT": exp.Array.from_arg_list,
|
2025-02-13 21:17:09 +01:00
|
|
|
"ARRAY_CONTAINS": lambda args: exp.ArrayContains(
|
|
|
|
this=seq_get(args, 1), expression=seq_get(args, 0)
|
|
|
|
),
|
2025-02-13 21:03:38 +01:00
|
|
|
"ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries(
|
|
|
|
# ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive
|
|
|
|
start=seq_get(args, 0),
|
|
|
|
end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)),
|
|
|
|
step=seq_get(args, 2),
|
|
|
|
),
|
2025-02-13 15:42:13 +01:00
|
|
|
"ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list,
|
2025-02-13 21:03:38 +01:00
|
|
|
"BITXOR": binary_from_function(exp.BitwiseXor),
|
|
|
|
"BIT_XOR": binary_from_function(exp.BitwiseXor),
|
|
|
|
"BOOLXOR": binary_from_function(exp.Xor),
|
2025-02-13 15:57:23 +01:00
|
|
|
"CONVERT_TIMEZONE": _parse_convert_timezone,
|
2025-02-13 21:19:14 +01:00
|
|
|
"DATE_TRUNC": _date_trunc_to_time,
|
2025-02-13 15:43:32 +01:00
|
|
|
"DATEADD": lambda args: exp.DateAdd(
|
2025-02-13 21:19:14 +01:00
|
|
|
this=seq_get(args, 2),
|
|
|
|
expression=seq_get(args, 1),
|
|
|
|
unit=_map_date_part(seq_get(args, 0)),
|
2025-02-13 15:43:32 +01:00
|
|
|
),
|
2025-02-13 20:39:26 +01:00
|
|
|
"DATEDIFF": _parse_datediff,
|
2025-02-13 15:42:13 +01:00
|
|
|
"DIV0": _div0_to_if,
|
2025-02-13 21:09:41 +01:00
|
|
|
"FLATTEN": exp.Explode.from_arg_list,
|
2025-02-13 21:20:36 +01:00
|
|
|
"GET_PATH": lambda args, dialect: exp.JSONExtract(
|
|
|
|
this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
|
|
|
|
),
|
2025-02-13 06:15:54 +01:00
|
|
|
"IFF": exp.If.from_arg_list,
|
2025-02-13 21:19:14 +01:00
|
|
|
"LAST_DAY": lambda args: exp.LastDay(
|
|
|
|
this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1))
|
|
|
|
),
|
2025-02-13 20:58:22 +01:00
|
|
|
"LISTAGG": exp.GroupConcat.from_arg_list,
|
2025-02-13 15:44:58 +01:00
|
|
|
"NULLIFZERO": _nullifzero_to_if,
|
2025-02-13 15:57:23 +01:00
|
|
|
"OBJECT_CONSTRUCT": _parse_object_construct,
|
2025-02-13 20:46:55 +01:00
|
|
|
"REGEXP_REPLACE": _parse_regexp_replace,
|
2025-02-13 20:45:52 +01:00
|
|
|
"REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list,
|
2025-02-13 15:44:58 +01:00
|
|
|
"RLIKE": exp.RegexpLike.from_arg_list,
|
|
|
|
"SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
|
2025-02-13 20:39:26 +01:00
|
|
|
"TIMEDIFF": _parse_datediff,
|
|
|
|
"TIMESTAMPDIFF": _parse_datediff,
|
2025-02-13 21:19:14 +01:00
|
|
|
"TIMESTAMPFROMPARTS": _parse_timestamp_from_parts,
|
|
|
|
"TIMESTAMP_FROM_PARTS": _parse_timestamp_from_parts,
|
2025-02-13 20:46:55 +01:00
|
|
|
"TO_TIMESTAMP": _parse_to_timestamp,
|
2025-02-13 20:45:52 +01:00
|
|
|
"TO_VARCHAR": exp.ToChar.from_arg_list,
|
2025-02-13 15:42:13 +01:00
|
|
|
"ZEROIFNULL": _zeroifnull_to_if,
|
2025-02-13 06:15:54 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 14:31:47 +01:00
|
|
|
FUNCTION_PARSERS = {
|
2025-02-13 14:53:05 +01:00
|
|
|
**parser.Parser.FUNCTION_PARSERS,
|
2025-02-13 14:48:46 +01:00
|
|
|
"DATE_PART": _parse_date_part,
|
2025-02-13 21:19:14 +01:00
|
|
|
"OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(),
|
2025-02-13 14:31:47 +01:00
|
|
|
}
|
2025-02-13 14:54:32 +01:00
|
|
|
FUNCTION_PARSERS.pop("TRIM")
|
2025-02-13 14:31:47 +01:00
|
|
|
|
2025-02-13 21:03:38 +01:00
|
|
|
TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME}
|
2025-02-13 15:57:23 +01:00
|
|
|
|
2025-02-13 15:26:26 +01:00
|
|
|
RANGE_PARSERS = {
|
2025-02-13 15:57:23 +01:00
|
|
|
**parser.Parser.RANGE_PARSERS,
|
2025-02-13 21:17:09 +01:00
|
|
|
TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny),
|
|
|
|
TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny),
|
2025-02-13 21:19:14 +01:00
|
|
|
TokenType.COLON: _parse_colon_get_path,
|
2025-02-13 15:26:26 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 15:42:13 +01:00
|
|
|
ALTER_PARSERS = {
|
2025-02-13 15:57:23 +01:00
|
|
|
**parser.Parser.ALTER_PARSERS,
|
2025-02-13 20:21:40 +01:00
|
|
|
"SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")),
|
|
|
|
"UNSET": lambda self: self.expression(
|
|
|
|
exp.Set,
|
|
|
|
tag=self._match_text_seq("TAG"),
|
|
|
|
expressions=self._parse_csv(self._parse_id_var),
|
|
|
|
unset=True,
|
|
|
|
),
|
2025-02-13 21:09:41 +01:00
|
|
|
"SWAP": lambda self: self._parse_alter_table_swap(),
|
2025-02-13 15:42:13 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
STATEMENT_PARSERS = {
|
|
|
|
**parser.Parser.STATEMENT_PARSERS,
|
|
|
|
TokenType.SHOW: lambda self: self._parse_show(),
|
|
|
|
}
|
|
|
|
|
2025-02-13 21:17:09 +01:00
|
|
|
PROPERTY_PARSERS = {
|
|
|
|
**parser.Parser.PROPERTY_PARSERS,
|
|
|
|
"LOCATION": lambda self: self._parse_location(),
|
|
|
|
}
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
SHOW_PARSERS = {
|
2025-02-13 21:19:58 +01:00
|
|
|
"SCHEMAS": _show_parser("SCHEMAS"),
|
|
|
|
"TERSE SCHEMAS": _show_parser("SCHEMAS"),
|
|
|
|
"OBJECTS": _show_parser("OBJECTS"),
|
|
|
|
"TERSE OBJECTS": _show_parser("OBJECTS"),
|
2025-02-13 21:20:36 +01:00
|
|
|
"TABLES": _show_parser("TABLES"),
|
|
|
|
"TERSE TABLES": _show_parser("TABLES"),
|
2025-02-13 20:58:22 +01:00
|
|
|
"PRIMARY KEYS": _show_parser("PRIMARY KEYS"),
|
|
|
|
"TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"),
|
2025-02-13 21:19:14 +01:00
|
|
|
"COLUMNS": _show_parser("COLUMNS"),
|
2025-02-13 21:27:51 +01:00
|
|
|
"USERS": _show_parser("USERS"),
|
|
|
|
"TERSE USERS": _show_parser("USERS"),
|
2025-02-13 20:58:22 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 21:04:58 +01:00
|
|
|
STAGED_FILE_SINGLE_TOKENS = {
|
|
|
|
TokenType.DOT,
|
|
|
|
TokenType.MOD,
|
|
|
|
TokenType.SLASH,
|
|
|
|
}
|
2025-02-13 21:19:14 +01:00
|
|
|
|
2025-02-13 21:09:41 +01:00
|
|
|
FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"]
|
|
|
|
|
2025-02-13 21:19:14 +01:00
|
|
|
def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]:
|
|
|
|
if is_map:
|
|
|
|
# Keys are strings in Snowflake's objects, see also:
|
|
|
|
# - https://docs.snowflake.com/en/sql-reference/data-types-semistructured
|
|
|
|
# - https://docs.snowflake.com/en/sql-reference/functions/object_construct
|
|
|
|
return self._parse_slice(self._parse_string())
|
|
|
|
|
|
|
|
return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True))
|
|
|
|
|
2025-02-13 21:09:41 +01:00
|
|
|
def _parse_lateral(self) -> t.Optional[exp.Lateral]:
|
|
|
|
lateral = super()._parse_lateral()
|
|
|
|
if not lateral:
|
|
|
|
return lateral
|
|
|
|
|
|
|
|
if isinstance(lateral.this, exp.Explode):
|
|
|
|
table_alias = lateral.args.get("alias")
|
|
|
|
columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS]
|
|
|
|
if table_alias and not table_alias.args.get("columns"):
|
|
|
|
table_alias.set("columns", columns)
|
|
|
|
elif not table_alias:
|
|
|
|
exp.alias_(lateral, "_flattened", table=columns, copy=False)
|
|
|
|
|
|
|
|
return lateral
|
2025-02-13 21:04:58 +01:00
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
def _parse_at_before(self, table: exp.Table) -> exp.Table:
|
|
|
|
# https://docs.snowflake.com/en/sql-reference/constructs/at-before
|
|
|
|
index = self._index
|
|
|
|
if self._match_texts(("AT", "BEFORE")):
|
|
|
|
this = self._prev.text.upper()
|
|
|
|
kind = (
|
|
|
|
self._match(TokenType.L_PAREN)
|
|
|
|
and self._match_texts(self.HISTORICAL_DATA_KIND)
|
|
|
|
and self._prev.text.upper()
|
|
|
|
)
|
|
|
|
expression = self._match(TokenType.FARROW) and self._parse_bitwise()
|
|
|
|
|
|
|
|
if expression:
|
|
|
|
self._match_r_paren()
|
|
|
|
when = self.expression(
|
|
|
|
exp.HistoricalData, this=this, kind=kind, expression=expression
|
|
|
|
)
|
|
|
|
table.set("when", when)
|
|
|
|
else:
|
|
|
|
self._retreat(index)
|
|
|
|
|
|
|
|
return table
|
|
|
|
|
2025-02-13 21:20:36 +01:00
|
|
|
def _parse_table_parts(
|
|
|
|
self, schema: bool = False, is_db_reference: bool = False
|
|
|
|
) -> exp.Table:
|
2025-02-13 21:04:58 +01:00
|
|
|
# https://docs.snowflake.com/en/user-guide/querying-stage
|
2025-02-13 21:17:51 +01:00
|
|
|
if self._match(TokenType.STRING, advance=False):
|
2025-02-13 21:04:58 +01:00
|
|
|
table = self._parse_string()
|
2025-02-13 21:17:51 +01:00
|
|
|
elif self._match_text_seq("@", advance=False):
|
|
|
|
table = self._parse_location_path()
|
|
|
|
else:
|
|
|
|
table = None
|
2025-02-13 21:04:58 +01:00
|
|
|
|
|
|
|
if table:
|
|
|
|
file_format = None
|
|
|
|
pattern = None
|
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
self._match(TokenType.L_PAREN)
|
|
|
|
while self._curr and not self._match(TokenType.R_PAREN):
|
|
|
|
if self._match_text_seq("FILE_FORMAT", "=>"):
|
2025-02-13 21:20:36 +01:00
|
|
|
file_format = self._parse_string() or super()._parse_table_parts(
|
|
|
|
is_db_reference=is_db_reference
|
|
|
|
)
|
2025-02-13 21:17:51 +01:00
|
|
|
elif self._match_text_seq("PATTERN", "=>"):
|
2025-02-13 21:04:58 +01:00
|
|
|
pattern = self._parse_string()
|
2025-02-13 21:17:51 +01:00
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
self._match(TokenType.COMMA)
|
2025-02-13 21:04:58 +01:00
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern)
|
|
|
|
else:
|
2025-02-13 21:20:36 +01:00
|
|
|
table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
|
2025-02-13 21:04:58 +01:00
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
return self._parse_at_before(table)
|
2025-02-13 21:04:58 +01:00
|
|
|
|
2025-02-13 20:21:40 +01:00
|
|
|
def _parse_id_var(
|
|
|
|
self,
|
|
|
|
any_token: bool = True,
|
|
|
|
tokens: t.Optional[t.Collection[TokenType]] = None,
|
|
|
|
) -> t.Optional[exp.Expression]:
|
|
|
|
if self._match_text_seq("IDENTIFIER", "("):
|
|
|
|
identifier = (
|
|
|
|
super()._parse_id_var(any_token=any_token, tokens=tokens)
|
|
|
|
or self._parse_string()
|
|
|
|
)
|
|
|
|
self._match_r_paren()
|
|
|
|
return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier])
|
|
|
|
|
|
|
|
return super()._parse_id_var(any_token=any_token, tokens=tokens)
|
2025-02-13 15:42:13 +01:00
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
def _parse_show_snowflake(self, this: str) -> exp.Show:
|
|
|
|
scope = None
|
|
|
|
scope_kind = None
|
|
|
|
|
2025-02-13 21:19:58 +01:00
|
|
|
# will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS
|
|
|
|
# which is syntactically valid but has no effect on the output
|
|
|
|
terse = self._tokens[self._index - 2].text.upper() == "TERSE"
|
|
|
|
|
2025-02-13 21:20:36 +01:00
|
|
|
history = self._match_text_seq("HISTORY")
|
|
|
|
|
2025-02-13 21:19:14 +01:00
|
|
|
like = self._parse_string() if self._match(TokenType.LIKE) else None
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
if self._match(TokenType.IN):
|
|
|
|
if self._match_text_seq("ACCOUNT"):
|
|
|
|
scope_kind = "ACCOUNT"
|
|
|
|
elif self._match_set(self.DB_CREATABLES):
|
2025-02-13 21:19:58 +01:00
|
|
|
scope_kind = self._prev.text.upper()
|
2025-02-13 20:58:22 +01:00
|
|
|
if self._curr:
|
2025-02-13 21:19:58 +01:00
|
|
|
scope = self._parse_table_parts()
|
2025-02-13 20:58:22 +01:00
|
|
|
elif self._curr:
|
2025-02-13 21:20:36 +01:00
|
|
|
scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE"
|
2025-02-13 21:19:58 +01:00
|
|
|
scope = self._parse_table_parts()
|
2025-02-13 20:58:22 +01:00
|
|
|
|
2025-02-13 21:19:14 +01:00
|
|
|
return self.expression(
|
2025-02-13 21:19:58 +01:00
|
|
|
exp.Show,
|
|
|
|
**{
|
|
|
|
"terse": terse,
|
|
|
|
"this": this,
|
2025-02-13 21:20:36 +01:00
|
|
|
"history": history,
|
2025-02-13 21:19:58 +01:00
|
|
|
"like": like,
|
|
|
|
"scope": scope,
|
|
|
|
"scope_kind": scope_kind,
|
|
|
|
"starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(),
|
|
|
|
"limit": self._parse_limit(),
|
|
|
|
"from": self._parse_string() if self._match(TokenType.FROM) else None,
|
|
|
|
},
|
2025-02-13 21:19:14 +01:00
|
|
|
)
|
2025-02-13 20:58:22 +01:00
|
|
|
|
2025-02-13 21:09:41 +01:00
|
|
|
def _parse_alter_table_swap(self) -> exp.SwapTable:
|
|
|
|
self._match_text_seq("WITH")
|
|
|
|
return self.expression(exp.SwapTable, this=self._parse_table(schema=True))
|
|
|
|
|
2025-02-13 21:17:09 +01:00
|
|
|
def _parse_location(self) -> exp.LocationProperty:
|
|
|
|
self._match(TokenType.EQ)
|
2025-02-13 21:17:51 +01:00
|
|
|
return self.expression(exp.LocationProperty, this=self._parse_location_path())
|
2025-02-13 21:17:09 +01:00
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
def _parse_location_path(self) -> exp.Var:
|
|
|
|
parts = [self._advance_any(ignore_reserved=True)]
|
2025-02-13 21:17:09 +01:00
|
|
|
|
2025-02-13 21:17:51 +01:00
|
|
|
# We avoid consuming a comma token because external tables like @foo and @bar
|
|
|
|
# can be joined in a query with a comma separator.
|
|
|
|
while self._is_connected() and not self._match(TokenType.COMMA, advance=False):
|
|
|
|
parts.append(self._advance_any(ignore_reserved=True))
|
|
|
|
|
|
|
|
return exp.var("".join(part.text for part in parts if part))
|
2025-02-13 21:17:09 +01:00
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
class Tokenizer(tokens.Tokenizer):
|
2025-02-13 15:23:26 +01:00
|
|
|
STRING_ESCAPES = ["\\", "'"]
|
2025-02-13 15:53:39 +01:00
|
|
|
HEX_STRINGS = [("x'", "'"), ("X'", "'")]
|
2025-02-13 20:51:40 +01:00
|
|
|
RAW_STRINGS = ["$$"]
|
2025-02-13 16:00:51 +01:00
|
|
|
COMMENTS = ["--", "//", ("/*", "*/")]
|
2025-02-13 14:31:47 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
KEYWORDS = {
|
2025-02-13 14:53:05 +01:00
|
|
|
**tokens.Tokenizer.KEYWORDS,
|
2025-02-13 20:15:27 +01:00
|
|
|
"BYTEINT": TokenType.INT,
|
2025-02-13 15:57:23 +01:00
|
|
|
"CHAR VARYING": TokenType.VARCHAR,
|
|
|
|
"CHARACTER VARYING": TokenType.VARCHAR,
|
2025-02-13 15:07:05 +01:00
|
|
|
"EXCLUDE": TokenType.EXCEPT,
|
2025-02-13 15:26:26 +01:00
|
|
|
"ILIKE ANY": TokenType.ILIKE_ANY,
|
|
|
|
"LIKE ANY": TokenType.LIKE_ANY,
|
2025-02-13 15:08:15 +01:00
|
|
|
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
|
2025-02-13 15:57:23 +01:00
|
|
|
"MINUS": TokenType.EXCEPT,
|
|
|
|
"NCHAR VARYING": TokenType.VARCHAR,
|
2025-02-13 15:23:26 +01:00
|
|
|
"PUT": TokenType.COMMAND,
|
2025-02-13 21:19:14 +01:00
|
|
|
"REMOVE": TokenType.COMMAND,
|
2025-02-13 15:07:05 +01:00
|
|
|
"RENAME": TokenType.REPLACE,
|
2025-02-13 21:19:14 +01:00
|
|
|
"RM": TokenType.COMMAND,
|
2025-02-13 20:04:59 +01:00
|
|
|
"SAMPLE": TokenType.TABLE_SAMPLE,
|
2025-02-13 21:19:14 +01:00
|
|
|
"SQL_DOUBLE": TokenType.DOUBLE,
|
|
|
|
"SQL_VARCHAR": TokenType.VARCHAR,
|
2025-02-13 14:31:47 +01:00
|
|
|
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
|
|
|
|
"TIMESTAMP_NTZ": TokenType.TIMESTAMP,
|
|
|
|
"TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
|
|
|
|
"TIMESTAMPNTZ": TokenType.TIMESTAMP,
|
2025-02-13 20:04:59 +01:00
|
|
|
"TOP": TokenType.TOP,
|
2025-02-13 06:15:54 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 15:26:26 +01:00
|
|
|
SINGLE_TOKENS = {
|
|
|
|
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
|
|
"$": TokenType.PARAMETER,
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:52:09 +01:00
|
|
|
VAR_SINGLE_TOKENS = {"$"}
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
class Generator(generator.Generator):
|
2025-02-13 15:26:26 +01:00
|
|
|
PARAMETER_TOKEN = "$"
|
2025-02-13 15:43:32 +01:00
|
|
|
MATCHED_BY_SOURCE = False
|
2025-02-13 15:52:09 +01:00
|
|
|
SINGLE_STRING_INTERVAL = True
|
|
|
|
JOIN_HINTS = False
|
|
|
|
TABLE_HINTS = False
|
2025-02-13 20:43:05 +01:00
|
|
|
QUERY_HINTS = False
|
2025-02-13 21:03:38 +01:00
|
|
|
AGGREGATE_FILTER_SUPPORTED = False
|
2025-02-13 21:04:58 +01:00
|
|
|
SUPPORTS_TABLE_COPY = False
|
|
|
|
COLLATE_IS_FUNC = True
|
2025-02-13 21:17:09 +01:00
|
|
|
LIMIT_ONLY_LITERALS = True
|
2025-02-13 21:19:14 +01:00
|
|
|
JSON_KEY_VALUE_PAIR_SEP = ","
|
|
|
|
INSERT_OVERWRITE = " OVERWRITE INTO"
|
2025-02-13 14:51:47 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
TRANSFORMS = {
|
2025-02-13 15:57:23 +01:00
|
|
|
**generator.Generator.TRANSFORMS,
|
2025-02-13 21:09:41 +01:00
|
|
|
exp.ArgMax: rename_func("MAX_BY"),
|
|
|
|
exp.ArgMin: rename_func("MIN_BY"),
|
2025-02-13 14:56:25 +01:00
|
|
|
exp.Array: inline_array_sql,
|
2025-02-13 14:48:46 +01:00
|
|
|
exp.ArrayConcat: rename_func("ARRAY_CAT"),
|
2025-02-13 21:17:09 +01:00
|
|
|
exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this),
|
2025-02-13 15:42:13 +01:00
|
|
|
exp.ArrayJoin: rename_func("ARRAY_TO_STRING"),
|
2025-02-13 15:50:57 +01:00
|
|
|
exp.AtTimeZone: lambda self, e: self.func(
|
|
|
|
"CONVERT_TIMEZONE", e.args.get("zone"), e.this
|
|
|
|
),
|
2025-02-13 21:03:38 +01:00
|
|
|
exp.BitwiseXor: rename_func("BITXOR"),
|
2025-02-13 21:17:09 +01:00
|
|
|
exp.DateAdd: date_delta_sql("DATEADD"),
|
|
|
|
exp.DateDiff: date_delta_sql("DATEDIFF"),
|
2025-02-13 15:01:55 +01:00
|
|
|
exp.DateStrToDate: datestrtodate_sql,
|
2025-02-13 14:56:25 +01:00
|
|
|
exp.DataType: _datatype_sql,
|
2025-02-13 21:09:41 +01:00
|
|
|
exp.DayOfMonth: rename_func("DAYOFMONTH"),
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.DayOfWeek: rename_func("DAYOFWEEK"),
|
2025-02-13 21:09:41 +01:00
|
|
|
exp.DayOfYear: rename_func("DAYOFYEAR"),
|
|
|
|
exp.Explode: rename_func("FLATTEN"),
|
2025-02-13 15:57:23 +01:00
|
|
|
exp.Extract: rename_func("DATE_PART"),
|
2025-02-13 21:19:58 +01:00
|
|
|
exp.FromTimeZone: lambda self, e: self.func(
|
|
|
|
"CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this
|
|
|
|
),
|
2025-02-13 21:03:38 +01:00
|
|
|
exp.GenerateSeries: lambda self, e: self.func(
|
|
|
|
"ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step")
|
|
|
|
),
|
2025-02-13 20:58:22 +01:00
|
|
|
exp.GroupConcat: rename_func("LISTAGG"),
|
2025-02-13 21:03:38 +01:00
|
|
|
exp.If: if_sql(name="IFF", false_value="NULL"),
|
2025-02-13 21:20:36 +01:00
|
|
|
exp.JSONExtract: rename_func("GET_PATH"),
|
|
|
|
exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"),
|
2025-02-13 21:19:14 +01:00
|
|
|
exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions),
|
2025-02-13 21:20:36 +01:00
|
|
|
exp.JSONPathRoot: lambda *_: "",
|
2025-02-13 15:46:19 +01:00
|
|
|
exp.LogicalAnd: rename_func("BOOLAND_AGG"),
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.LogicalOr: rename_func("BOOLOR_AGG"),
|
|
|
|
exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
|
|
|
|
exp.Max: max_or_greatest,
|
|
|
|
exp.Min: min_or_least,
|
2025-02-13 14:56:25 +01:00
|
|
|
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
|
2025-02-13 21:04:58 +01:00
|
|
|
exp.PercentileCont: transforms.preprocess(
|
|
|
|
[transforms.add_within_group_for_percentiles]
|
|
|
|
),
|
|
|
|
exp.PercentileDisc: transforms.preprocess(
|
|
|
|
[transforms.add_within_group_for_percentiles]
|
|
|
|
),
|
2025-02-13 21:19:14 +01:00
|
|
|
exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]),
|
2025-02-13 20:58:22 +01:00
|
|
|
exp.RegexpILike: _regexpilike_sql,
|
2025-02-13 21:18:34 +01:00
|
|
|
exp.Rand: rename_func("RANDOM"),
|
2025-02-13 21:03:38 +01:00
|
|
|
exp.Select: transforms.preprocess(
|
|
|
|
[
|
|
|
|
transforms.eliminate_distinct_on,
|
2025-02-13 21:17:09 +01:00
|
|
|
transforms.explode_to_unnest(),
|
2025-02-13 21:03:38 +01:00
|
|
|
transforms.eliminate_semi_and_anti_joins,
|
|
|
|
]
|
|
|
|
),
|
2025-02-13 21:17:09 +01:00
|
|
|
exp.SHA: rename_func("SHA1"),
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
|
2025-02-13 20:51:40 +01:00
|
|
|
exp.StartsWith: rename_func("STARTSWITH"),
|
2025-02-13 15:26:26 +01:00
|
|
|
exp.StrPosition: lambda self, e: self.func(
|
|
|
|
"POSITION", e.args.get("substr"), e.this, e.args.get("position")
|
|
|
|
),
|
2025-02-13 21:20:36 +01:00
|
|
|
exp.StrToTime: lambda self,
|
|
|
|
e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
|
2025-02-13 15:57:23 +01:00
|
|
|
exp.Struct: lambda self, e: self.func(
|
|
|
|
"OBJECT_CONSTRUCT",
|
|
|
|
*(arg for expression in e.expressions for arg in expression.flatten()),
|
|
|
|
),
|
2025-02-13 20:58:22 +01:00
|
|
|
exp.Stuff: rename_func("INSERT"),
|
2025-02-13 21:19:14 +01:00
|
|
|
exp.TimestampDiff: lambda self, e: self.func(
|
|
|
|
"TIMESTAMPDIFF", e.unit, e.expression, e.this
|
|
|
|
),
|
2025-02-13 20:45:52 +01:00
|
|
|
exp.TimestampTrunc: timestamptrunc_sql,
|
2025-02-13 15:01:55 +01:00
|
|
|
exp.TimeStrToTime: timestrtotime_sql,
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.TimeToStr: lambda self, e: self.func(
|
|
|
|
"TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e)
|
|
|
|
),
|
2025-02-13 20:45:52 +01:00
|
|
|
exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
|
2025-02-13 21:17:51 +01:00
|
|
|
exp.ToArray: rename_func("TO_ARRAY"),
|
2025-02-13 15:44:58 +01:00
|
|
|
exp.ToChar: lambda self, e: self.function_fallback_sql(e),
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression),
|
2025-02-13 21:17:09 +01:00
|
|
|
exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True),
|
|
|
|
exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
|
2025-02-13 21:19:14 +01:00
|
|
|
exp.UnixToTime: rename_func("TO_TIMESTAMP"),
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
|
2025-02-13 20:51:40 +01:00
|
|
|
exp.WeekOfYear: rename_func("WEEKOFYEAR"),
|
2025-02-13 21:03:38 +01:00
|
|
|
exp.Xor: rename_func("BOOLXOR"),
|
2025-02-13 06:15:54 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 21:20:36 +01:00
|
|
|
SUPPORTED_JSON_PATH_PARTS = {
|
|
|
|
exp.JSONPathKey,
|
|
|
|
exp.JSONPathRoot,
|
|
|
|
exp.JSONPathSubscript,
|
|
|
|
}
|
|
|
|
|
2025-02-13 14:31:47 +01:00
|
|
|
TYPE_MAPPING = {
|
2025-02-13 15:57:23 +01:00
|
|
|
**generator.Generator.TYPE_MAPPING,
|
2025-02-13 14:31:47 +01:00
|
|
|
exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:07:05 +01:00
|
|
|
STAR_MAPPING = {
|
|
|
|
"except": "EXCLUDE",
|
|
|
|
"replace": "RENAME",
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:40:23 +01:00
|
|
|
PROPERTIES_LOCATION = {
|
2025-02-13 15:57:23 +01:00
|
|
|
**generator.Generator.PROPERTIES_LOCATION,
|
2025-02-13 15:40:23 +01:00
|
|
|
exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
|
2025-02-13 15:52:09 +01:00
|
|
|
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
|
2025-02-13 15:40:23 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 21:19:14 +01:00
|
|
|
def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
|
|
|
|
milli = expression.args.get("milli")
|
|
|
|
if milli is not None:
|
|
|
|
milli_to_nano = milli.pop() * exp.Literal.number(1000000)
|
|
|
|
expression.set("nano", milli_to_nano)
|
|
|
|
|
|
|
|
return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
|
|
|
|
|
2025-02-13 21:17:09 +01:00
|
|
|
def trycast_sql(self, expression: exp.TryCast) -> str:
|
|
|
|
value = expression.this
|
|
|
|
|
|
|
|
if value.type is None:
|
|
|
|
from sqlglot.optimizer.annotate_types import annotate_types
|
|
|
|
|
|
|
|
value = annotate_types(value)
|
|
|
|
|
|
|
|
if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN):
|
|
|
|
return super().trycast_sql(expression)
|
|
|
|
|
|
|
|
# TRY_CAST only works for string values in Snowflake
|
|
|
|
return self.cast_sql(expression)
|
|
|
|
|
2025-02-13 21:09:41 +01:00
|
|
|
def log_sql(self, expression: exp.Log) -> str:
|
|
|
|
if not expression.expression:
|
|
|
|
return self.func("LN", expression.this)
|
|
|
|
|
|
|
|
return super().log_sql(expression)
|
|
|
|
|
2025-02-13 21:03:38 +01:00
|
|
|
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
|
|
unnest_alias = expression.args.get("alias")
|
|
|
|
offset = expression.args.get("offset")
|
|
|
|
|
2025-02-13 21:17:09 +01:00
|
|
|
columns = [
|
|
|
|
exp.to_identifier("seq"),
|
|
|
|
exp.to_identifier("key"),
|
|
|
|
exp.to_identifier("path"),
|
|
|
|
offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"),
|
|
|
|
seq_get(unnest_alias.columns if unnest_alias else [], 0)
|
|
|
|
or exp.to_identifier("value"),
|
|
|
|
exp.to_identifier("this"),
|
|
|
|
]
|
|
|
|
|
|
|
|
if unnest_alias:
|
|
|
|
unnest_alias.set("columns", columns)
|
|
|
|
else:
|
|
|
|
unnest_alias = exp.TableAlias(this="_u", columns=columns)
|
|
|
|
|
|
|
|
explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))"
|
2025-02-13 21:03:38 +01:00
|
|
|
alias = self.sql(unnest_alias)
|
|
|
|
alias = f" AS {alias}" if alias else ""
|
2025-02-13 21:17:09 +01:00
|
|
|
return f"{explode}{alias}"
|
2025-02-13 21:03:38 +01:00
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
def show_sql(self, expression: exp.Show) -> str:
|
2025-02-13 21:19:58 +01:00
|
|
|
terse = "TERSE " if expression.args.get("terse") else ""
|
2025-02-13 21:20:36 +01:00
|
|
|
history = " HISTORY" if expression.args.get("history") else ""
|
2025-02-13 21:19:14 +01:00
|
|
|
like = self.sql(expression, "like")
|
|
|
|
like = f" LIKE {like}" if like else ""
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
scope = self.sql(expression, "scope")
|
|
|
|
scope = f" {scope}" if scope else ""
|
|
|
|
|
|
|
|
scope_kind = self.sql(expression, "scope_kind")
|
|
|
|
if scope_kind:
|
|
|
|
scope_kind = f" IN {scope_kind}"
|
|
|
|
|
2025-02-13 21:19:58 +01:00
|
|
|
starts_with = self.sql(expression, "starts_with")
|
|
|
|
if starts_with:
|
|
|
|
starts_with = f" STARTS WITH {starts_with}"
|
|
|
|
|
|
|
|
limit = self.sql(expression, "limit")
|
|
|
|
|
|
|
|
from_ = self.sql(expression, "from")
|
|
|
|
if from_:
|
|
|
|
from_ = f" FROM {from_}"
|
|
|
|
|
2025-02-13 21:20:36 +01:00
|
|
|
return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
|
2025-02-13 20:58:22 +01:00
|
|
|
|
2025-02-13 20:45:52 +01:00
|
|
|
def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
|
|
|
|
# Other dialects don't support all of the following parameters, so we need to
|
|
|
|
# generate default values as necessary to ensure the transpilation is correct
|
|
|
|
group = expression.args.get("group")
|
|
|
|
parameters = expression.args.get("parameters") or (group and exp.Literal.string("c"))
|
|
|
|
occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1))
|
|
|
|
position = expression.args.get("position") or (occurrence and exp.Literal.number(1))
|
|
|
|
|
|
|
|
return self.func(
|
|
|
|
"REGEXP_SUBSTR",
|
|
|
|
expression.this,
|
|
|
|
expression.expression,
|
|
|
|
position,
|
|
|
|
occurrence,
|
|
|
|
parameters,
|
|
|
|
group,
|
|
|
|
)
|
|
|
|
|
2025-02-13 15:52:09 +01:00
|
|
|
def except_op(self, expression: exp.Except) -> str:
|
2025-02-13 06:15:54 +01:00
|
|
|
if not expression.args.get("distinct", False):
|
|
|
|
self.unsupported("EXCEPT with All is not supported in Snowflake")
|
|
|
|
return super().except_op(expression)
|
|
|
|
|
2025-02-13 15:52:09 +01:00
|
|
|
def intersect_op(self, expression: exp.Intersect) -> str:
|
2025-02-13 06:15:54 +01:00
|
|
|
if not expression.args.get("distinct", False):
|
|
|
|
self.unsupported("INTERSECT with All is not supported in Snowflake")
|
|
|
|
return super().intersect_op(expression)
|
2025-02-13 15:01:55 +01:00
|
|
|
|
2025-02-13 15:03:38 +01:00
|
|
|
def describe_sql(self, expression: exp.Describe) -> str:
|
|
|
|
# Default to table if kind is unknown
|
|
|
|
kind_value = expression.args.get("kind") or "TABLE"
|
|
|
|
kind = f" {kind_value}" if kind_value else ""
|
|
|
|
this = f" {self.sql(expression, 'this')}"
|
2025-02-13 20:58:22 +01:00
|
|
|
expressions = self.expressions(expression, flat=True)
|
|
|
|
expressions = f" {expressions}" if expressions else ""
|
|
|
|
return f"DESCRIBE{kind}{this}{expressions}"
|
2025-02-13 15:23:26 +01:00
|
|
|
|
|
|
|
def generatedasidentitycolumnconstraint_sql(
|
|
|
|
self, expression: exp.GeneratedAsIdentityColumnConstraint
|
|
|
|
) -> str:
|
|
|
|
start = expression.args.get("start")
|
|
|
|
start = f" START {start}" if start else ""
|
|
|
|
increment = expression.args.get("increment")
|
|
|
|
increment = f" INCREMENT {increment}" if increment else ""
|
|
|
|
return f"AUTOINCREMENT{start}{increment}"
|
2025-02-13 21:09:41 +01:00
|
|
|
|
|
|
|
def swaptable_sql(self, expression: exp.SwapTable) -> str:
|
|
|
|
this = self.sql(expression, "this")
|
|
|
|
return f"SWAP WITH {this}"
|
2025-02-13 21:17:09 +01:00
|
|
|
|
|
|
|
def with_properties(self, properties: exp.Properties) -> str:
|
|
|
|
return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ")
|
2025-02-13 21:19:58 +01:00
|
|
|
|
|
|
|
def cluster_sql(self, expression: exp.Cluster) -> str:
|
|
|
|
return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
|