2025-02-13 14:52:26 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
import typing as t
|
|
|
|
|
2025-02-13 14:55:11 +01:00
|
|
|
from sqlglot import exp, transforms
|
2025-02-13 20:49:56 +01:00
|
|
|
from sqlglot.dialects.dialect import (
|
|
|
|
concat_to_dpipe_sql,
|
2025-02-13 21:00:44 +01:00
|
|
|
concat_ws_to_dpipe_sql,
|
2025-02-13 20:49:56 +01:00
|
|
|
rename_func,
|
|
|
|
ts_or_ds_to_date_sql,
|
|
|
|
)
|
2025-02-13 07:47:22 +01:00
|
|
|
from sqlglot.dialects.postgres import Postgres
|
2025-02-13 15:24:45 +01:00
|
|
|
from sqlglot.helper import seq_get
|
2025-02-13 07:47:22 +01:00
|
|
|
from sqlglot.tokens import TokenType
|
|
|
|
|
|
|
|
|
2025-02-13 20:56:33 +01:00
|
|
|
def _json_sql(self: Redshift.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar) -> str:
|
2025-02-13 15:56:32 +01:00
|
|
|
return f'{self.sql(expression, "this")}."{expression.expression.name}"'
|
2025-02-13 15:51:35 +01:00
|
|
|
|
|
|
|
|
2025-02-13 20:49:56 +01:00
|
|
|
def _parse_date_add(args: t.List) -> exp.DateAdd:
|
|
|
|
return exp.DateAdd(
|
|
|
|
this=exp.TsOrDsToDate(this=seq_get(args, 2)),
|
|
|
|
expression=seq_get(args, 1),
|
|
|
|
unit=seq_get(args, 0),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2025-02-13 07:47:22 +01:00
|
|
|
class Redshift(Postgres):
|
2025-02-13 20:04:17 +01:00
|
|
|
# https://docs.aws.amazon.com/redshift/latest/dg/r_names.html
|
|
|
|
RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
|
|
|
|
|
2025-02-13 21:02:03 +01:00
|
|
|
SUPPORTS_USER_DEFINED_TYPES = False
|
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'"
|
|
|
|
TIME_MAPPING = {
|
|
|
|
**Postgres.TIME_MAPPING,
|
2025-02-13 07:47:22 +01:00
|
|
|
"MON": "%b",
|
|
|
|
"HH": "%H",
|
|
|
|
}
|
|
|
|
|
2025-02-13 14:57:38 +01:00
|
|
|
class Parser(Postgres.Parser):
|
|
|
|
FUNCTIONS = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**Postgres.Parser.FUNCTIONS,
|
2025-02-13 20:46:23 +01:00
|
|
|
"ADD_MONTHS": lambda args: exp.DateAdd(
|
|
|
|
this=exp.TsOrDsToDate(this=seq_get(args, 0)),
|
|
|
|
expression=seq_get(args, 1),
|
|
|
|
unit=exp.var("month"),
|
|
|
|
),
|
2025-02-13 20:49:56 +01:00
|
|
|
"DATEADD": _parse_date_add,
|
|
|
|
"DATE_ADD": _parse_date_add,
|
2025-02-13 15:24:45 +01:00
|
|
|
"DATEDIFF": lambda args: exp.DateDiff(
|
2025-02-13 15:58:03 +01:00
|
|
|
this=exp.TsOrDsToDate(this=seq_get(args, 2)),
|
|
|
|
expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
|
2025-02-13 15:24:45 +01:00
|
|
|
unit=seq_get(args, 0),
|
|
|
|
),
|
2025-02-13 15:58:03 +01:00
|
|
|
"STRTOL": exp.FromBase.from_arg_list,
|
2025-02-13 14:57:38 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 15:58:03 +01:00
|
|
|
def _parse_types(
|
2025-02-13 20:56:33 +01:00
|
|
|
self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
|
2025-02-13 15:58:03 +01:00
|
|
|
) -> t.Optional[exp.Expression]:
|
2025-02-13 20:56:33 +01:00
|
|
|
this = super()._parse_types(
|
|
|
|
check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
|
|
|
|
)
|
2025-02-13 15:04:17 +01:00
|
|
|
|
|
|
|
if (
|
|
|
|
isinstance(this, exp.DataType)
|
2025-02-13 15:58:03 +01:00
|
|
|
and this.is_type("varchar")
|
2025-02-13 15:04:17 +01:00
|
|
|
and this.expressions
|
2025-02-13 15:56:32 +01:00
|
|
|
and this.expressions[0].this == exp.column("MAX")
|
2025-02-13 15:04:17 +01:00
|
|
|
):
|
2025-02-13 16:00:14 +01:00
|
|
|
this.set("expressions", [exp.var("MAX")])
|
2025-02-13 15:04:17 +01:00
|
|
|
|
|
|
|
return this
|
|
|
|
|
2025-02-13 20:20:19 +01:00
|
|
|
def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
|
|
|
|
to = self._parse_types()
|
|
|
|
self._match(TokenType.COMMA)
|
|
|
|
this = self._parse_bitwise()
|
|
|
|
return self.expression(exp.TryCast, this=this, to=to)
|
|
|
|
|
2025-02-13 07:47:22 +01:00
|
|
|
class Tokenizer(Postgres.Tokenizer):
|
2025-02-13 15:52:54 +01:00
|
|
|
BIT_STRINGS = []
|
|
|
|
HEX_STRINGS = []
|
2025-02-13 15:22:50 +01:00
|
|
|
STRING_ESCAPES = ["\\"]
|
2025-02-13 07:47:22 +01:00
|
|
|
|
|
|
|
KEYWORDS = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**Postgres.Tokenizer.KEYWORDS,
|
2025-02-13 07:47:22 +01:00
|
|
|
"HLLSKETCH": TokenType.HLLSKETCH,
|
|
|
|
"SUPER": TokenType.SUPER,
|
2025-02-13 15:51:35 +01:00
|
|
|
"SYSDATE": TokenType.CURRENT_TIMESTAMP,
|
2025-02-13 15:42:50 +01:00
|
|
|
"TOP": TokenType.TOP,
|
2025-02-13 14:55:11 +01:00
|
|
|
"UNLOAD": TokenType.COMMAND,
|
2025-02-13 14:52:26 +01:00
|
|
|
"VARBYTE": TokenType.VARBINARY,
|
2025-02-13 07:47:22 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 15:51:35 +01:00
|
|
|
# Redshift allows # to appear as a table identifier prefix
|
|
|
|
SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy()
|
|
|
|
SINGLE_TOKENS.pop("#")
|
|
|
|
|
2025-02-13 07:47:22 +01:00
|
|
|
class Generator(Postgres.Generator):
|
2025-02-13 15:51:35 +01:00
|
|
|
LOCKING_READS_SUPPORTED = False
|
2025-02-13 15:56:32 +01:00
|
|
|
RENAME_TABLE_WITH_DB = False
|
2025-02-13 20:42:40 +01:00
|
|
|
QUERY_HINTS = False
|
2025-02-13 20:46:23 +01:00
|
|
|
VALUES_AS_TABLE = False
|
2025-02-13 20:54:58 +01:00
|
|
|
TZ_TO_WITH_TIME_ZONE = True
|
2025-02-13 20:56:33 +01:00
|
|
|
NVL2_SUPPORTED = True
|
2025-02-13 15:51:35 +01:00
|
|
|
|
2025-02-13 07:47:22 +01:00
|
|
|
TYPE_MAPPING = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**Postgres.Generator.TYPE_MAPPING,
|
2025-02-13 07:47:22 +01:00
|
|
|
exp.DataType.Type.BINARY: "VARBYTE",
|
|
|
|
exp.DataType.Type.INT: "INTEGER",
|
2025-02-13 20:54:58 +01:00
|
|
|
exp.DataType.Type.TIMETZ: "TIME",
|
|
|
|
exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
|
|
|
|
exp.DataType.Type.VARBINARY: "VARBYTE",
|
2025-02-13 07:47:22 +01:00
|
|
|
}
|
2025-02-13 14:55:11 +01:00
|
|
|
|
2025-02-13 15:09:11 +01:00
|
|
|
PROPERTIES_LOCATION = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**Postgres.Generator.PROPERTIES_LOCATION,
|
2025-02-13 15:24:45 +01:00
|
|
|
exp.LikeProperty: exp.Properties.Location.POST_WITH,
|
2025-02-13 15:04:17 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 14:55:11 +01:00
|
|
|
TRANSFORMS = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**Postgres.Generator.TRANSFORMS,
|
2025-02-13 16:00:14 +01:00
|
|
|
exp.Concat: concat_to_dpipe_sql,
|
2025-02-13 21:00:44 +01:00
|
|
|
exp.ConcatWs: concat_ws_to_dpipe_sql,
|
2025-02-13 15:51:35 +01:00
|
|
|
exp.CurrentTimestamp: lambda self, e: "SYSDATE",
|
2025-02-13 15:45:33 +01:00
|
|
|
exp.DateAdd: lambda self, e: self.func(
|
|
|
|
"DATEADD", exp.var(e.text("unit") or "day"), e.expression, e.this
|
|
|
|
),
|
2025-02-13 15:24:45 +01:00
|
|
|
exp.DateDiff: lambda self, e: self.func(
|
2025-02-13 15:45:33 +01:00
|
|
|
"DATEDIFF", exp.var(e.text("unit") or "day"), e.expression, e.this
|
2025-02-13 15:24:45 +01:00
|
|
|
),
|
2025-02-13 14:55:11 +01:00
|
|
|
exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})",
|
|
|
|
exp.DistStyleProperty: lambda self, e: self.naked_property(e),
|
2025-02-13 15:58:03 +01:00
|
|
|
exp.FromBase: rename_func("STRTOL"),
|
2025-02-13 15:51:35 +01:00
|
|
|
exp.JSONExtract: _json_sql,
|
|
|
|
exp.JSONExtractScalar: _json_sql,
|
2025-02-13 16:00:14 +01:00
|
|
|
exp.SafeConcat: concat_to_dpipe_sql,
|
2025-02-13 21:03:05 +01:00
|
|
|
exp.Select: transforms.preprocess(
|
|
|
|
[transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins]
|
|
|
|
),
|
2025-02-13 15:45:33 +01:00
|
|
|
exp.SortKeyProperty: lambda self, e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})",
|
2025-02-13 20:49:56 +01:00
|
|
|
exp.TsOrDsToDate: ts_or_ds_to_date_sql("redshift"),
|
2025-02-13 14:55:11 +01:00
|
|
|
}
|
2025-02-13 15:04:17 +01:00
|
|
|
|
2025-02-13 15:56:32 +01:00
|
|
|
# Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
|
|
|
|
TRANSFORMS.pop(exp.Pivot)
|
|
|
|
|
2025-02-13 15:44:19 +01:00
|
|
|
# Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres)
|
|
|
|
TRANSFORMS.pop(exp.Pow)
|
|
|
|
|
2025-02-13 20:56:33 +01:00
|
|
|
# Redshift supports ANY_VALUE(..)
|
|
|
|
TRANSFORMS.pop(exp.AnyValue)
|
|
|
|
|
2025-02-13 15:56:32 +01:00
|
|
|
RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"}
|
2025-02-13 15:51:35 +01:00
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
def with_properties(self, properties: exp.Properties) -> str:
|
|
|
|
"""Redshift doesn't have `WITH` as part of their with_properties so we remove it"""
|
|
|
|
return self.properties(properties, prefix=" ", suffix="")
|
|
|
|
|
|
|
|
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
|
|
"""
|
|
|
|
Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean
|
|
|
|
VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type
|
|
|
|
without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert
|
|
|
|
`TEXT` to `VARCHAR`.
|
|
|
|
"""
|
2025-02-13 15:58:03 +01:00
|
|
|
if expression.is_type("text"):
|
2025-02-13 15:04:17 +01:00
|
|
|
expression = expression.copy()
|
|
|
|
expression.set("this", exp.DataType.Type.VARCHAR)
|
|
|
|
precision = expression.args.get("expressions")
|
2025-02-13 15:56:32 +01:00
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
if not precision:
|
2025-02-13 16:00:14 +01:00
|
|
|
expression.append("expressions", exp.var("MAX"))
|
2025-02-13 15:56:32 +01:00
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
return super().datatype_sql(expression)
|