2025-02-13 15:04:17 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2025-02-13 20:49:56 +01:00
|
|
|
import typing as t
|
|
|
|
|
2025-02-13 15:52:54 +01:00
|
|
|
from sqlglot import exp, generator, parser, tokens, transforms
|
2025-02-13 16:00:14 +01:00
|
|
|
from sqlglot.dialects.dialect import Dialect, max_or_greatest, min_or_least
|
2025-02-13 15:04:17 +01:00
|
|
|
from sqlglot.tokens import TokenType
|
|
|
|
|
|
|
|
|
|
|
|
class Teradata(Dialect):
|
2025-02-13 21:03:05 +01:00
|
|
|
SUPPORTS_SEMI_ANTI_JOIN = False
|
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
TIME_MAPPING = {
|
|
|
|
"Y": "%Y",
|
|
|
|
"YYYY": "%Y",
|
|
|
|
"YY": "%y",
|
|
|
|
"MMMM": "%B",
|
|
|
|
"MMM": "%b",
|
|
|
|
"DD": "%d",
|
|
|
|
"D": "%-d",
|
|
|
|
"HH": "%H",
|
|
|
|
"H": "%-H",
|
|
|
|
"MM": "%M",
|
|
|
|
"M": "%-M",
|
|
|
|
"SS": "%S",
|
|
|
|
"S": "%-S",
|
|
|
|
"SSSSSS": "%f",
|
|
|
|
"E": "%a",
|
|
|
|
"EE": "%a",
|
|
|
|
"EEE": "%a",
|
|
|
|
"EEEE": "%A",
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:24:45 +01:00
|
|
|
class Tokenizer(tokens.Tokenizer):
|
|
|
|
# https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Comparison-Operators-and-Functions/Comparison-Operators/ANSI-Compliance
|
|
|
|
KEYWORDS = {
|
|
|
|
**tokens.Tokenizer.KEYWORDS,
|
2025-02-13 20:04:17 +01:00
|
|
|
"^=": TokenType.NEQ,
|
2025-02-13 15:24:45 +01:00
|
|
|
"BYTEINT": TokenType.SMALLINT,
|
2025-02-13 20:47:46 +01:00
|
|
|
"COLLECT": TokenType.COMMAND,
|
2025-02-13 20:04:17 +01:00
|
|
|
"GE": TokenType.GTE,
|
|
|
|
"GT": TokenType.GT,
|
2025-02-13 20:47:46 +01:00
|
|
|
"HELP": TokenType.COMMAND,
|
2025-02-13 15:24:45 +01:00
|
|
|
"INS": TokenType.INSERT,
|
|
|
|
"LE": TokenType.LTE,
|
2025-02-13 20:04:17 +01:00
|
|
|
"LT": TokenType.LT,
|
|
|
|
"MOD": TokenType.MOD,
|
2025-02-13 15:24:45 +01:00
|
|
|
"NE": TokenType.NEQ,
|
|
|
|
"NOT=": TokenType.NEQ,
|
2025-02-13 20:59:23 +01:00
|
|
|
"SAMPLE": TokenType.TABLE_SAMPLE,
|
2025-02-13 20:04:17 +01:00
|
|
|
"SEL": TokenType.SELECT,
|
2025-02-13 15:24:45 +01:00
|
|
|
"ST_GEOMETRY": TokenType.GEOMETRY,
|
2025-02-13 20:04:17 +01:00
|
|
|
"TOP": TokenType.TOP,
|
2025-02-13 15:24:45 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
# Teradata does not support % as a modulo operator
|
2025-02-13 15:24:45 +01:00
|
|
|
SINGLE_TOKENS = {**tokens.Tokenizer.SINGLE_TOKENS}
|
|
|
|
SINGLE_TOKENS.pop("%")
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
class Parser(parser.Parser):
|
2025-02-13 20:59:23 +01:00
|
|
|
TABLESAMPLE_CSV = True
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
CHARSET_TRANSLATORS = {
|
|
|
|
"GRAPHIC_TO_KANJISJIS",
|
|
|
|
"GRAPHIC_TO_LATIN",
|
|
|
|
"GRAPHIC_TO_UNICODE",
|
|
|
|
"GRAPHIC_TO_UNICODE_PadSpace",
|
|
|
|
"KANJI1_KanjiEBCDIC_TO_UNICODE",
|
|
|
|
"KANJI1_KanjiEUC_TO_UNICODE",
|
|
|
|
"KANJI1_KANJISJIS_TO_UNICODE",
|
|
|
|
"KANJI1_SBC_TO_UNICODE",
|
|
|
|
"KANJISJIS_TO_GRAPHIC",
|
|
|
|
"KANJISJIS_TO_LATIN",
|
|
|
|
"KANJISJIS_TO_UNICODE",
|
|
|
|
"LATIN_TO_GRAPHIC",
|
|
|
|
"LATIN_TO_KANJISJIS",
|
|
|
|
"LATIN_TO_UNICODE",
|
|
|
|
"LOCALE_TO_UNICODE",
|
|
|
|
"UNICODE_TO_GRAPHIC",
|
|
|
|
"UNICODE_TO_GRAPHIC_PadGraphic",
|
|
|
|
"UNICODE_TO_GRAPHIC_VarGraphic",
|
|
|
|
"UNICODE_TO_KANJI1_KanjiEBCDIC",
|
|
|
|
"UNICODE_TO_KANJI1_KanjiEUC",
|
|
|
|
"UNICODE_TO_KANJI1_KANJISJIS",
|
|
|
|
"UNICODE_TO_KANJI1_SBC",
|
|
|
|
"UNICODE_TO_KANJISJIS",
|
|
|
|
"UNICODE_TO_LATIN",
|
|
|
|
"UNICODE_TO_LOCALE",
|
|
|
|
"UNICODE_TO_UNICODE_FoldSpace",
|
|
|
|
"UNICODE_TO_UNICODE_Fullwidth",
|
|
|
|
"UNICODE_TO_UNICODE_Halfwidth",
|
|
|
|
"UNICODE_TO_UNICODE_NFC",
|
|
|
|
"UNICODE_TO_UNICODE_NFD",
|
|
|
|
"UNICODE_TO_UNICODE_NFKC",
|
|
|
|
"UNICODE_TO_UNICODE_NFKD",
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:24:45 +01:00
|
|
|
FUNC_TOKENS = {*parser.Parser.FUNC_TOKENS}
|
|
|
|
FUNC_TOKENS.remove(TokenType.REPLACE)
|
|
|
|
|
|
|
|
STATEMENT_PARSERS = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**parser.Parser.STATEMENT_PARSERS,
|
2025-02-13 20:56:33 +01:00
|
|
|
TokenType.DATABASE: lambda self: self.expression(
|
|
|
|
exp.Use, this=self._parse_table(schema=False)
|
|
|
|
),
|
2025-02-13 15:24:45 +01:00
|
|
|
TokenType.REPLACE: lambda self: self._parse_create(),
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
FUNCTION_PARSERS = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**parser.Parser.FUNCTION_PARSERS,
|
2025-02-13 15:41:13 +01:00
|
|
|
"RANGE_N": lambda self: self._parse_rangen(),
|
2025-02-13 15:04:17 +01:00
|
|
|
"TRANSLATE": lambda self: self._parse_translate(self.STRICT_CAST),
|
|
|
|
}
|
|
|
|
|
|
|
|
def _parse_translate(self, strict: bool) -> exp.Expression:
|
|
|
|
this = self._parse_conjunction()
|
|
|
|
|
|
|
|
if not self._match(TokenType.USING):
|
|
|
|
self.raise_error("Expected USING in TRANSLATE")
|
|
|
|
|
|
|
|
if self._match_texts(self.CHARSET_TRANSLATORS):
|
|
|
|
charset_split = self._prev.text.split("_TO_")
|
|
|
|
to = self.expression(exp.CharacterSet, this=charset_split[1])
|
|
|
|
else:
|
|
|
|
self.raise_error("Expected a character set translator after USING in TRANSLATE")
|
|
|
|
|
|
|
|
return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
|
|
|
|
|
|
|
|
# FROM before SET in Teradata UPDATE syntax
|
|
|
|
# https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause
|
2025-02-13 16:00:14 +01:00
|
|
|
def _parse_update(self) -> exp.Update:
|
2025-02-13 15:04:17 +01:00
|
|
|
return self.expression(
|
|
|
|
exp.Update,
|
|
|
|
**{ # type: ignore
|
|
|
|
"this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
|
2025-02-13 20:42:40 +01:00
|
|
|
"from": self._parse_from(joins=True),
|
2025-02-13 15:04:17 +01:00
|
|
|
"expressions": self._match(TokenType.SET)
|
|
|
|
and self._parse_csv(self._parse_equality),
|
|
|
|
"where": self._parse_where(),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
2025-02-13 15:41:13 +01:00
|
|
|
def _parse_rangen(self):
|
|
|
|
this = self._parse_id_var()
|
|
|
|
self._match(TokenType.BETWEEN)
|
|
|
|
|
|
|
|
expressions = self._parse_csv(self._parse_conjunction)
|
|
|
|
each = self._match_text_seq("EACH") and self._parse_conjunction()
|
|
|
|
|
|
|
|
return self.expression(exp.RangeN, this=this, expressions=expressions, each=each)
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
class Generator(generator.Generator):
|
2025-02-13 15:51:35 +01:00
|
|
|
JOIN_HINTS = False
|
|
|
|
TABLE_HINTS = False
|
2025-02-13 20:42:40 +01:00
|
|
|
QUERY_HINTS = False
|
2025-02-13 15:51:35 +01:00
|
|
|
|
2025-02-13 15:24:45 +01:00
|
|
|
TYPE_MAPPING = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**generator.Generator.TYPE_MAPPING,
|
2025-02-13 15:24:45 +01:00
|
|
|
exp.DataType.Type.GEOMETRY: "ST_GEOMETRY",
|
|
|
|
}
|
|
|
|
|
2025-02-13 15:09:11 +01:00
|
|
|
PROPERTIES_LOCATION = {
|
2025-02-13 15:56:32 +01:00
|
|
|
**generator.Generator.PROPERTIES_LOCATION,
|
|
|
|
exp.OnCommitProperty: exp.Properties.Location.POST_INDEX,
|
|
|
|
exp.PartitionedByProperty: exp.Properties.Location.POST_EXPRESSION,
|
|
|
|
exp.StabilityProperty: exp.Properties.Location.POST_CREATE,
|
2025-02-13 15:09:11 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 15:42:50 +01:00
|
|
|
TRANSFORMS = {
|
|
|
|
**generator.Generator.TRANSFORMS,
|
2025-02-13 15:47:04 +01:00
|
|
|
exp.Max: max_or_greatest,
|
2025-02-13 15:42:50 +01:00
|
|
|
exp.Min: min_or_least,
|
2025-02-13 21:03:05 +01:00
|
|
|
exp.Select: transforms.preprocess(
|
|
|
|
[transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins]
|
|
|
|
),
|
2025-02-13 16:00:14 +01:00
|
|
|
exp.StrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE FORMAT {self.format_time(e)})",
|
2025-02-13 15:44:19 +01:00
|
|
|
exp.ToChar: lambda self, e: self.function_fallback_sql(e),
|
2025-02-13 20:56:33 +01:00
|
|
|
exp.Use: lambda self, e: f"DATABASE {self.sql(e, 'this')}",
|
2025-02-13 15:42:50 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 20:59:23 +01:00
|
|
|
def tablesample_sql(
|
|
|
|
self, expression: exp.TableSample, seed_prefix: str = "SEED", sep=" AS "
|
|
|
|
) -> str:
|
|
|
|
return f"{self.sql(expression, 'this')} SAMPLE {self.expressions(expression)}"
|
|
|
|
|
2025-02-13 15:09:11 +01:00
|
|
|
def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
|
|
|
|
return f"PARTITION BY {self.sql(expression, 'this')}"
|
|
|
|
|
2025-02-13 15:04:17 +01:00
|
|
|
# FROM before SET in Teradata UPDATE syntax
|
|
|
|
# https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause
|
|
|
|
def update_sql(self, expression: exp.Update) -> str:
|
|
|
|
this = self.sql(expression, "this")
|
|
|
|
from_sql = self.sql(expression, "from")
|
|
|
|
set_sql = self.expressions(expression, flat=True)
|
|
|
|
where_sql = self.sql(expression, "where")
|
|
|
|
sql = f"UPDATE {this}{from_sql} SET {set_sql}{where_sql}"
|
|
|
|
return self.prepend_ctes(expression, sql)
|
2025-02-13 15:24:45 +01:00
|
|
|
|
|
|
|
def mod_sql(self, expression: exp.Mod) -> str:
|
|
|
|
return self.binary(expression, "MOD")
|
|
|
|
|
|
|
|
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
|
|
type_sql = super().datatype_sql(expression)
|
|
|
|
prefix_sql = expression.args.get("prefix")
|
|
|
|
return f"SYSUDTLIB.{type_sql}" if prefix_sql else type_sql
|
2025-02-13 15:41:13 +01:00
|
|
|
|
|
|
|
def rangen_sql(self, expression: exp.RangeN) -> str:
|
|
|
|
this = self.sql(expression, "this")
|
|
|
|
expressions_sql = self.expressions(expression)
|
|
|
|
each_sql = self.sql(expression, "each")
|
|
|
|
each_sql = f" EACH {each_sql}" if each_sql else ""
|
|
|
|
|
|
|
|
return f"RANGE_N({this} BETWEEN {expressions_sql}{each_sql})"
|
2025-02-13 15:58:03 +01:00
|
|
|
|
2025-02-13 20:49:56 +01:00
|
|
|
def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str:
|
2025-02-13 15:58:03 +01:00
|
|
|
kind = self.sql(expression, "kind").upper()
|
|
|
|
if kind == "TABLE" and locations.get(exp.Properties.Location.POST_NAME):
|
|
|
|
this_name = self.sql(expression.this, "this")
|
|
|
|
this_properties = self.properties(
|
|
|
|
exp.Properties(expressions=locations[exp.Properties.Location.POST_NAME]),
|
|
|
|
wrapped=False,
|
|
|
|
prefix=",",
|
|
|
|
)
|
|
|
|
this_schema = self.schema_columns_sql(expression.this)
|
|
|
|
return f"{this_name}{this_properties}{self.sep()}{this_schema}"
|
2025-02-13 20:49:56 +01:00
|
|
|
|
2025-02-13 15:58:03 +01:00
|
|
|
return super().createable_sql(expression, locations)
|