Adding upstream version 6.0.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
d01130b3f1
commit
527597d2af
122 changed files with 23162 additions and 0 deletions
15
sqlglot/dialects/__init__.py
Normal file
15
sqlglot/dialects/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from sqlglot.dialects.bigquery import BigQuery
|
||||
from sqlglot.dialects.clickhouse import ClickHouse
|
||||
from sqlglot.dialects.dialect import Dialect, Dialects
|
||||
from sqlglot.dialects.duckdb import DuckDB
|
||||
from sqlglot.dialects.hive import Hive
|
||||
from sqlglot.dialects.mysql import MySQL
|
||||
from sqlglot.dialects.oracle import Oracle
|
||||
from sqlglot.dialects.postgres import Postgres
|
||||
from sqlglot.dialects.presto import Presto
|
||||
from sqlglot.dialects.snowflake import Snowflake
|
||||
from sqlglot.dialects.spark import Spark
|
||||
from sqlglot.dialects.sqlite import SQLite
|
||||
from sqlglot.dialects.starrocks import StarRocks
|
||||
from sqlglot.dialects.tableau import Tableau
|
||||
from sqlglot.dialects.trino import Trino
|
128
sqlglot/dialects/bigquery.py
Normal file
128
sqlglot/dialects/bigquery.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
inline_array_sql,
|
||||
no_ilike_sql,
|
||||
rename_func,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _date_add(expression_class):
|
||||
def func(args):
|
||||
interval = list_get(args, 1)
|
||||
return expression_class(
|
||||
this=list_get(args, 0),
|
||||
expression=interval.this,
|
||||
unit=interval.args.get("unit"),
|
||||
)
|
||||
|
||||
return func
|
||||
|
||||
|
||||
def _date_add_sql(data_type, kind):
|
||||
def func(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
unit = self.sql(expression, "unit") or "'day'"
|
||||
expression = self.sql(expression, "expression")
|
||||
return f"{data_type}_{kind}({this}, INTERVAL {expression} {unit})"
|
||||
|
||||
return func
|
||||
|
||||
|
||||
class BigQuery(Dialect):
|
||||
unnest_column_only = True
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
QUOTES = [
|
||||
(prefix + quote, quote) if prefix else quote
|
||||
for quote in ["'", '"', '"""', "'''"]
|
||||
for prefix in ["", "r", "R"]
|
||||
]
|
||||
IDENTIFIERS = ["`"]
|
||||
ESCAPE = "\\"
|
||||
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
|
||||
"CURRENT_TIME": TokenType.CURRENT_TIME,
|
||||
"GEOGRAPHY": TokenType.GEOGRAPHY,
|
||||
"INT64": TokenType.BIGINT,
|
||||
"FLOAT64": TokenType.DOUBLE,
|
||||
"QUALIFY": TokenType.QUALIFY,
|
||||
"UNKNOWN": TokenType.NULL,
|
||||
"WINDOW": TokenType.WINDOW,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"DATE_ADD": _date_add(exp.DateAdd),
|
||||
"DATETIME_ADD": _date_add(exp.DatetimeAdd),
|
||||
"TIME_ADD": _date_add(exp.TimeAdd),
|
||||
"TIMESTAMP_ADD": _date_add(exp.TimestampAdd),
|
||||
"DATE_SUB": _date_add(exp.DateSub),
|
||||
"DATETIME_SUB": _date_add(exp.DatetimeSub),
|
||||
"TIME_SUB": _date_add(exp.TimeSub),
|
||||
"TIMESTAMP_SUB": _date_add(exp.TimestampSub),
|
||||
}
|
||||
|
||||
NO_PAREN_FUNCTIONS = {
|
||||
**Parser.NO_PAREN_FUNCTIONS,
|
||||
TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
|
||||
TokenType.CURRENT_TIME: exp.CurrentTime,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
TRANSFORMS = {
|
||||
exp.Array: inline_array_sql,
|
||||
exp.ArraySize: rename_func("ARRAY_LENGTH"),
|
||||
exp.DateAdd: _date_add_sql("DATE", "ADD"),
|
||||
exp.DateSub: _date_add_sql("DATE", "SUB"),
|
||||
exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
|
||||
exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.TimeAdd: _date_add_sql("TIME", "ADD"),
|
||||
exp.TimeSub: _date_add_sql("TIME", "SUB"),
|
||||
exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
|
||||
exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
|
||||
exp.VariancePop: rename_func("VAR_POP"),
|
||||
}
|
||||
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TINYINT: "INT64",
|
||||
exp.DataType.Type.SMALLINT: "INT64",
|
||||
exp.DataType.Type.INT: "INT64",
|
||||
exp.DataType.Type.BIGINT: "INT64",
|
||||
exp.DataType.Type.DECIMAL: "NUMERIC",
|
||||
exp.DataType.Type.FLOAT: "FLOAT64",
|
||||
exp.DataType.Type.DOUBLE: "FLOAT64",
|
||||
exp.DataType.Type.BOOLEAN: "BOOL",
|
||||
exp.DataType.Type.TEXT: "STRING",
|
||||
exp.DataType.Type.VARCHAR: "STRING",
|
||||
exp.DataType.Type.NVARCHAR: "STRING",
|
||||
}
|
||||
|
||||
def in_unnest_op(self, unnest):
|
||||
return self.sql(unnest)
|
||||
|
||||
def union_op(self, expression):
|
||||
return f"UNION{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
|
||||
|
||||
def except_op(self, expression):
|
||||
if not expression.args.get("distinct", False):
|
||||
self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery")
|
||||
return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
|
||||
|
||||
def intersect_op(self, expression):
|
||||
if not expression.args.get("distinct", False):
|
||||
self.unsupported(
|
||||
"INTERSECT without DISTINCT is not supported in BigQuery"
|
||||
)
|
||||
return (
|
||||
f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
|
||||
)
|
48
sqlglot/dialects/clickhouse.py
Normal file
48
sqlglot/dialects/clickhouse.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import Dialect, inline_array_sql
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
class ClickHouse(Dialect):
|
||||
normalize_functions = None
|
||||
null_ordering = "nulls_are_last"
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
IDENTIFIERS = ['"', "`"]
|
||||
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"NULLABLE": TokenType.NULLABLE,
|
||||
"FINAL": TokenType.FINAL,
|
||||
"INT8": TokenType.TINYINT,
|
||||
"INT16": TokenType.SMALLINT,
|
||||
"INT32": TokenType.INT,
|
||||
"INT64": TokenType.BIGINT,
|
||||
"FLOAT32": TokenType.FLOAT,
|
||||
"FLOAT64": TokenType.DOUBLE,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
def _parse_table(self, schema=False):
|
||||
this = super()._parse_table(schema)
|
||||
|
||||
if self._match(TokenType.FINAL):
|
||||
this = self.expression(exp.Final, this=this)
|
||||
|
||||
return this
|
||||
|
||||
class Generator(Generator):
|
||||
STRUCT_DELIMITER = ("(", ")")
|
||||
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.NULLABLE: "Nullable",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.Array: inline_array_sql,
|
||||
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
|
||||
}
|
268
sqlglot/dialects/dialect.py
Normal file
268
sqlglot/dialects/dialect.py
Normal file
|
@ -0,0 +1,268 @@
|
|||
from enum import Enum
|
||||
|
||||
from sqlglot import exp
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import csv, list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.time import format_time
|
||||
from sqlglot.tokens import Tokenizer
|
||||
from sqlglot.trie import new_trie
|
||||
|
||||
|
||||
class Dialects(str, Enum):
|
||||
DIALECT = ""
|
||||
|
||||
BIGQUERY = "bigquery"
|
||||
CLICKHOUSE = "clickhouse"
|
||||
DUCKDB = "duckdb"
|
||||
HIVE = "hive"
|
||||
MYSQL = "mysql"
|
||||
ORACLE = "oracle"
|
||||
POSTGRES = "postgres"
|
||||
PRESTO = "presto"
|
||||
SNOWFLAKE = "snowflake"
|
||||
SPARK = "spark"
|
||||
SQLITE = "sqlite"
|
||||
STARROCKS = "starrocks"
|
||||
TABLEAU = "tableau"
|
||||
TRINO = "trino"
|
||||
|
||||
|
||||
class _Dialect(type):
|
||||
classes = {}
|
||||
|
||||
@classmethod
|
||||
def __getitem__(cls, key):
|
||||
return cls.classes[key]
|
||||
|
||||
@classmethod
|
||||
def get(cls, key, default=None):
|
||||
return cls.classes.get(key, default)
|
||||
|
||||
def __new__(cls, clsname, bases, attrs):
|
||||
klass = super().__new__(cls, clsname, bases, attrs)
|
||||
enum = Dialects.__members__.get(clsname.upper())
|
||||
cls.classes[enum.value if enum is not None else clsname.lower()] = klass
|
||||
|
||||
klass.time_trie = new_trie(klass.time_mapping)
|
||||
klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()}
|
||||
klass.inverse_time_trie = new_trie(klass.inverse_time_mapping)
|
||||
|
||||
klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer)
|
||||
klass.parser_class = getattr(klass, "Parser", Parser)
|
||||
klass.generator_class = getattr(klass, "Generator", Generator)
|
||||
|
||||
klass.tokenizer = klass.tokenizer_class()
|
||||
klass.quote_start, klass.quote_end = list(klass.tokenizer_class.QUOTES.items())[
|
||||
0
|
||||
]
|
||||
klass.identifier_start, klass.identifier_end = list(
|
||||
klass.tokenizer_class.IDENTIFIERS.items()
|
||||
)[0]
|
||||
|
||||
return klass
|
||||
|
||||
|
||||
class Dialect(metaclass=_Dialect):
|
||||
index_offset = 0
|
||||
unnest_column_only = False
|
||||
alias_post_tablesample = False
|
||||
normalize_functions = "upper"
|
||||
null_ordering = "nulls_are_small"
|
||||
|
||||
date_format = "'%Y-%m-%d'"
|
||||
dateint_format = "'%Y%m%d'"
|
||||
time_format = "'%Y-%m-%d %H:%M:%S'"
|
||||
time_mapping = {}
|
||||
|
||||
# autofilled
|
||||
quote_start = None
|
||||
quote_end = None
|
||||
identifier_start = None
|
||||
identifier_end = None
|
||||
|
||||
time_trie = None
|
||||
inverse_time_mapping = None
|
||||
inverse_time_trie = None
|
||||
tokenizer_class = None
|
||||
parser_class = None
|
||||
generator_class = None
|
||||
tokenizer = None
|
||||
|
||||
@classmethod
|
||||
def get_or_raise(cls, dialect):
|
||||
if not dialect:
|
||||
return cls
|
||||
result = cls.get(dialect)
|
||||
if not result:
|
||||
raise ValueError(f"Unknown dialect '{dialect}'")
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def format_time(cls, expression):
|
||||
if isinstance(expression, str):
|
||||
return exp.Literal.string(
|
||||
format_time(
|
||||
expression[1:-1], # the time formats are quoted
|
||||
cls.time_mapping,
|
||||
cls.time_trie,
|
||||
)
|
||||
)
|
||||
if expression and expression.is_string:
|
||||
return exp.Literal.string(
|
||||
format_time(
|
||||
expression.this,
|
||||
cls.time_mapping,
|
||||
cls.time_trie,
|
||||
)
|
||||
)
|
||||
return expression
|
||||
|
||||
def parse(self, sql, **opts):
|
||||
return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
|
||||
|
||||
def parse_into(self, expression_type, sql, **opts):
|
||||
return self.parser(**opts).parse_into(
|
||||
expression_type, self.tokenizer.tokenize(sql), sql
|
||||
)
|
||||
|
||||
def generate(self, expression, **opts):
|
||||
return self.generator(**opts).generate(expression)
|
||||
|
||||
def transpile(self, code, **opts):
|
||||
return self.generate(self.parse(code), **opts)
|
||||
|
||||
def parser(self, **opts):
|
||||
return self.parser_class(
|
||||
**{
|
||||
"index_offset": self.index_offset,
|
||||
"unnest_column_only": self.unnest_column_only,
|
||||
"alias_post_tablesample": self.alias_post_tablesample,
|
||||
"null_ordering": self.null_ordering,
|
||||
**opts,
|
||||
},
|
||||
)
|
||||
|
||||
def generator(self, **opts):
|
||||
return self.generator_class(
|
||||
**{
|
||||
"quote_start": self.quote_start,
|
||||
"quote_end": self.quote_end,
|
||||
"identifier_start": self.identifier_start,
|
||||
"identifier_end": self.identifier_end,
|
||||
"escape": self.tokenizer_class.ESCAPE,
|
||||
"index_offset": self.index_offset,
|
||||
"time_mapping": self.inverse_time_mapping,
|
||||
"time_trie": self.inverse_time_trie,
|
||||
"unnest_column_only": self.unnest_column_only,
|
||||
"alias_post_tablesample": self.alias_post_tablesample,
|
||||
"normalize_functions": self.normalize_functions,
|
||||
"null_ordering": self.null_ordering,
|
||||
**opts,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def rename_func(name):
|
||||
return (
|
||||
lambda self, expression: f"{name}({csv(*[self.sql(e) for e in expression.args.values()])})"
|
||||
)
|
||||
|
||||
|
||||
def approx_count_distinct_sql(self, expression):
|
||||
if expression.args.get("accuracy"):
|
||||
self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
|
||||
return f"APPROX_COUNT_DISTINCT({self.sql(expression, 'this')})"
|
||||
|
||||
|
||||
def if_sql(self, expression):
|
||||
expressions = csv(
|
||||
self.sql(expression, "this"),
|
||||
self.sql(expression, "true"),
|
||||
self.sql(expression, "false"),
|
||||
)
|
||||
return f"IF({expressions})"
|
||||
|
||||
|
||||
def arrow_json_extract_sql(self, expression):
|
||||
return f"{self.sql(expression, 'this')}->{self.sql(expression, 'path')}"
|
||||
|
||||
|
||||
def arrow_json_extract_scalar_sql(self, expression):
|
||||
return f"{self.sql(expression, 'this')}->>{self.sql(expression, 'path')}"
|
||||
|
||||
|
||||
def inline_array_sql(self, expression):
|
||||
return f"[{self.expressions(expression)}]"
|
||||
|
||||
|
||||
def no_ilike_sql(self, expression):
|
||||
return self.like_sql(
|
||||
exp.Like(
|
||||
this=exp.Lower(this=expression.this),
|
||||
expression=expression.args["expression"],
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def no_paren_current_date_sql(self, expression):
|
||||
zone = self.sql(expression, "this")
|
||||
return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
|
||||
|
||||
|
||||
def no_recursive_cte_sql(self, expression):
|
||||
if expression.args.get("recursive"):
|
||||
self.unsupported("Recursive CTEs are unsupported")
|
||||
expression.args["recursive"] = False
|
||||
return self.with_sql(expression)
|
||||
|
||||
|
||||
def no_safe_divide_sql(self, expression):
|
||||
n = self.sql(expression, "this")
|
||||
d = self.sql(expression, "expression")
|
||||
return f"IF({d} <> 0, {n} / {d}, NULL)"
|
||||
|
||||
|
||||
def no_tablesample_sql(self, expression):
|
||||
self.unsupported("TABLESAMPLE unsupported")
|
||||
return self.sql(expression.this)
|
||||
|
||||
|
||||
def no_trycast_sql(self, expression):
|
||||
return self.cast_sql(expression)
|
||||
|
||||
|
||||
def str_position_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
substr = self.sql(expression, "substr")
|
||||
position = self.sql(expression, "position")
|
||||
if position:
|
||||
return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
|
||||
return f"STRPOS({this}, {substr})"
|
||||
|
||||
|
||||
def struct_extract_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
|
||||
return f"{this}.{struct_key}"
|
||||
|
||||
|
||||
def format_time_lambda(exp_class, dialect, default=None):
|
||||
"""Helper used for time expressions.
|
||||
|
||||
Args
|
||||
exp_class (Class): the expression class to instantiate
|
||||
dialect (string): sql dialect
|
||||
default (Option[bool | str]): the default format, True being time
|
||||
"""
|
||||
|
||||
def _format_time(args):
|
||||
return exp_class(
|
||||
this=list_get(args, 0),
|
||||
format=Dialect[dialect].format_time(
|
||||
list_get(args, 1)
|
||||
or (Dialect[dialect].time_format if default is True else default)
|
||||
),
|
||||
)
|
||||
|
||||
return _format_time
|
156
sqlglot/dialects/duckdb.py
Normal file
156
sqlglot/dialects/duckdb.py
Normal file
|
@ -0,0 +1,156 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
approx_count_distinct_sql,
|
||||
arrow_json_extract_scalar_sql,
|
||||
arrow_json_extract_sql,
|
||||
format_time_lambda,
|
||||
no_safe_divide_sql,
|
||||
no_tablesample_sql,
|
||||
rename_func,
|
||||
str_position_sql,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _unix_to_time(self, expression):
|
||||
return f"TO_TIMESTAMP(CAST({self.sql(expression, 'this')} AS BIGINT))"
|
||||
|
||||
|
||||
def _str_to_time_sql(self, expression):
|
||||
return f"STRPTIME({self.sql(expression, 'this')}, {self.format_time(expression)})"
|
||||
|
||||
|
||||
def _ts_or_ds_add(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
e = self.sql(expression, "expression")
|
||||
unit = self.sql(expression, "unit").strip("'") or "DAY"
|
||||
return f"CAST({this} AS DATE) + INTERVAL {e} {unit}"
|
||||
|
||||
|
||||
def _ts_or_ds_to_date_sql(self, expression):
|
||||
time_format = self.format_time(expression)
|
||||
if time_format and time_format not in (DuckDB.time_format, DuckDB.date_format):
|
||||
return f"CAST({_str_to_time_sql(self, expression)} AS DATE)"
|
||||
return f"CAST({self.sql(expression, 'this')} AS DATE)"
|
||||
|
||||
|
||||
def _date_add(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
e = self.sql(expression, "expression")
|
||||
unit = self.sql(expression, "unit").strip("'") or "DAY"
|
||||
return f"{this} + INTERVAL {e} {unit}"
|
||||
|
||||
|
||||
def _array_sort_sql(self, expression):
|
||||
if expression.expression:
|
||||
self.unsupported("DUCKDB ARRAY_SORT does not support a comparator")
|
||||
return f"ARRAY_SORT({self.sql(expression, 'this')})"
|
||||
|
||||
|
||||
def _sort_array_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
if expression.args.get("asc") == exp.FALSE:
|
||||
return f"ARRAY_REVERSE_SORT({this})"
|
||||
return f"ARRAY_SORT({this})"
|
||||
|
||||
|
||||
def _sort_array_reverse(args):
|
||||
return exp.SortArray(this=list_get(args, 0), asc=exp.FALSE)
|
||||
|
||||
|
||||
def _struct_pack_sql(self, expression):
|
||||
args = [
|
||||
self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e)
|
||||
for e in expression.expressions
|
||||
]
|
||||
return f"STRUCT_PACK({', '.join(args)})"
|
||||
|
||||
|
||||
class DuckDB(Dialect):
|
||||
class Tokenizer(Tokenizer):
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
":=": TokenType.EQ,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
|
||||
"ARRAY_LENGTH": exp.ArraySize.from_arg_list,
|
||||
"ARRAY_SORT": exp.SortArray.from_arg_list,
|
||||
"ARRAY_REVERSE_SORT": _sort_array_reverse,
|
||||
"EPOCH": exp.TimeToUnix.from_arg_list,
|
||||
"EPOCH_MS": lambda args: exp.UnixToTime(
|
||||
this=exp.Div(
|
||||
this=list_get(args, 0),
|
||||
expression=exp.Literal.number(1000),
|
||||
)
|
||||
),
|
||||
"LIST_SORT": exp.SortArray.from_arg_list,
|
||||
"LIST_REVERSE_SORT": _sort_array_reverse,
|
||||
"LIST_VALUE": exp.Array.from_arg_list,
|
||||
"REGEXP_MATCHES": exp.RegexpLike.from_arg_list,
|
||||
"STRFTIME": format_time_lambda(exp.TimeToStr, "duckdb"),
|
||||
"STRPTIME": format_time_lambda(exp.StrToTime, "duckdb"),
|
||||
"STR_SPLIT": exp.Split.from_arg_list,
|
||||
"STRING_SPLIT": exp.Split.from_arg_list,
|
||||
"STRING_TO_ARRAY": exp.Split.from_arg_list,
|
||||
"STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
|
||||
"STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
|
||||
"STRUCT_PACK": exp.Struct.from_arg_list,
|
||||
"TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list,
|
||||
"UNNEST": exp.Explode.from_arg_list,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.ApproxDistinct: approx_count_distinct_sql,
|
||||
exp.Array: lambda self, e: f"LIST_VALUE({self.expressions(e, flat=True)})",
|
||||
exp.ArraySize: rename_func("ARRAY_LENGTH"),
|
||||
exp.ArraySort: _array_sort_sql,
|
||||
exp.ArraySum: rename_func("LIST_SUM"),
|
||||
exp.DateAdd: _date_add,
|
||||
exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""",
|
||||
exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
|
||||
exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.dateint_format}) AS INT)",
|
||||
exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.dateint_format}) AS DATE)",
|
||||
exp.Explode: rename_func("UNNEST"),
|
||||
exp.JSONExtract: arrow_json_extract_sql,
|
||||
exp.JSONExtractScalar: arrow_json_extract_scalar_sql,
|
||||
exp.JSONBExtract: arrow_json_extract_sql,
|
||||
exp.JSONBExtractScalar: arrow_json_extract_scalar_sql,
|
||||
exp.RegexpLike: rename_func("REGEXP_MATCHES"),
|
||||
exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
|
||||
exp.SafeDivide: no_safe_divide_sql,
|
||||
exp.Split: rename_func("STR_SPLIT"),
|
||||
exp.SortArray: _sort_array_sql,
|
||||
exp.StrPosition: str_position_sql,
|
||||
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
||||
exp.StrToTime: _str_to_time_sql,
|
||||
exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
|
||||
exp.Struct: _struct_pack_sql,
|
||||
exp.TableSample: no_tablesample_sql,
|
||||
exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
|
||||
exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)",
|
||||
exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))",
|
||||
exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.TimeToUnix: rename_func("EPOCH"),
|
||||
exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
|
||||
exp.TsOrDsAdd: _ts_or_ds_add,
|
||||
exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
|
||||
exp.UnixToStr: lambda self, e: f"STRFTIME({_unix_to_time(self, e)}, {self.format_time(e)})",
|
||||
exp.UnixToTime: _unix_to_time,
|
||||
exp.UnixToTimeStr: lambda self, e: f"CAST({_unix_to_time(self, e)} AS TEXT)",
|
||||
}
|
||||
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.VARCHAR: "TEXT",
|
||||
exp.DataType.Type.NVARCHAR: "TEXT",
|
||||
}
|
304
sqlglot/dialects/hive.py
Normal file
304
sqlglot/dialects/hive.py
Normal file
|
@ -0,0 +1,304 @@
|
|||
from sqlglot import exp, transforms
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
approx_count_distinct_sql,
|
||||
format_time_lambda,
|
||||
if_sql,
|
||||
no_ilike_sql,
|
||||
no_recursive_cte_sql,
|
||||
no_safe_divide_sql,
|
||||
no_trycast_sql,
|
||||
rename_func,
|
||||
struct_extract_sql,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import csv, list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer
|
||||
|
||||
|
||||
def _parse_map(args):
|
||||
keys = []
|
||||
values = []
|
||||
for i in range(0, len(args), 2):
|
||||
keys.append(args[i])
|
||||
values.append(args[i + 1])
|
||||
return HiveMap(
|
||||
keys=exp.Array(expressions=keys),
|
||||
values=exp.Array(expressions=values),
|
||||
)
|
||||
|
||||
|
||||
def _map_sql(self, expression):
|
||||
keys = expression.args["keys"]
|
||||
values = expression.args["values"]
|
||||
|
||||
if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
|
||||
self.unsupported("Cannot convert array columns into map use SparkSQL instead.")
|
||||
return f"MAP({self.sql(keys)}, {self.sql(values)})"
|
||||
|
||||
args = []
|
||||
for key, value in zip(keys.expressions, values.expressions):
|
||||
args.append(self.sql(key))
|
||||
args.append(self.sql(value))
|
||||
return f"MAP({csv(*args)})"
|
||||
|
||||
|
||||
def _array_sort(self, expression):
|
||||
if expression.expression:
|
||||
self.unsupported("Hive SORT_ARRAY does not support a comparator")
|
||||
return f"SORT_ARRAY({self.sql(expression, 'this')})"
|
||||
|
||||
|
||||
def _property_sql(self, expression):
|
||||
key = expression.name
|
||||
value = self.sql(expression, "value")
|
||||
return f"'{key}' = {value}"
|
||||
|
||||
|
||||
def _str_to_unix(self, expression):
|
||||
return f"UNIX_TIMESTAMP({csv(self.sql(expression, 'this'), _time_format(self, expression))})"
|
||||
|
||||
|
||||
def _str_to_date(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
time_format = self.format_time(expression)
|
||||
if time_format not in (Hive.time_format, Hive.date_format):
|
||||
this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
|
||||
return f"CAST({this} AS DATE)"
|
||||
|
||||
|
||||
def _str_to_time(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
time_format = self.format_time(expression)
|
||||
if time_format not in (Hive.time_format, Hive.date_format):
|
||||
this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
|
||||
return f"CAST({this} AS TIMESTAMP)"
|
||||
|
||||
|
||||
def _time_format(self, expression):
|
||||
time_format = self.format_time(expression)
|
||||
if time_format == Hive.time_format:
|
||||
return None
|
||||
return time_format
|
||||
|
||||
|
||||
def _time_to_str(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
time_format = self.format_time(expression)
|
||||
return f"DATE_FORMAT({this}, {time_format})"
|
||||
|
||||
|
||||
def _to_date_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
time_format = self.format_time(expression)
|
||||
if time_format and time_format not in (Hive.time_format, Hive.date_format):
|
||||
return f"TO_DATE({this}, {time_format})"
|
||||
return f"TO_DATE({this})"
|
||||
|
||||
|
||||
def _unnest_to_explode_sql(self, expression):
|
||||
unnest = expression.this
|
||||
if isinstance(unnest, exp.Unnest):
|
||||
alias = unnest.args.get("alias")
|
||||
udtf = exp.Posexplode if unnest.args.get("ordinality") else exp.Explode
|
||||
return "".join(
|
||||
self.sql(
|
||||
exp.Lateral(
|
||||
this=udtf(this=expression),
|
||||
alias=exp.TableAlias(this=alias.this, columns=[column]),
|
||||
)
|
||||
)
|
||||
for expression, column in zip(
|
||||
unnest.expressions, alias.columns if alias else []
|
||||
)
|
||||
)
|
||||
return self.join_sql(expression)
|
||||
|
||||
|
||||
def _index_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
table = self.sql(expression, "table")
|
||||
columns = self.sql(expression, "columns")
|
||||
return f"{this} ON TABLE {table} {columns}"
|
||||
|
||||
|
||||
class HiveMap(exp.Map):
|
||||
is_var_len_args = True
|
||||
|
||||
|
||||
class Hive(Dialect):
|
||||
alias_post_tablesample = True
|
||||
|
||||
time_mapping = {
|
||||
"y": "%Y",
|
||||
"Y": "%Y",
|
||||
"YYYY": "%Y",
|
||||
"yyyy": "%Y",
|
||||
"YY": "%y",
|
||||
"yy": "%y",
|
||||
"MMMM": "%B",
|
||||
"MMM": "%b",
|
||||
"MM": "%m",
|
||||
"M": "%-m",
|
||||
"dd": "%d",
|
||||
"d": "%-d",
|
||||
"HH": "%H",
|
||||
"H": "%-H",
|
||||
"hh": "%I",
|
||||
"h": "%-I",
|
||||
"mm": "%M",
|
||||
"m": "%-M",
|
||||
"ss": "%S",
|
||||
"s": "%-S",
|
||||
"S": "%f",
|
||||
}
|
||||
|
||||
date_format = "'yyyy-MM-dd'"
|
||||
dateint_format = "'yyyyMMdd'"
|
||||
time_format = "'yyyy-MM-dd HH:mm:ss'"
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
QUOTES = ["'", '"']
|
||||
IDENTIFIERS = ["`"]
|
||||
ESCAPE = "\\"
|
||||
ENCODE = "utf-8"
|
||||
|
||||
NUMERIC_LITERALS = {
|
||||
"L": "BIGINT",
|
||||
"S": "SMALLINT",
|
||||
"Y": "TINYINT",
|
||||
"D": "DOUBLE",
|
||||
"F": "FLOAT",
|
||||
"BD": "DECIMAL",
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
STRICT_CAST = False
|
||||
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list,
|
||||
"COLLECT_LIST": exp.ArrayAgg.from_arg_list,
|
||||
"DATE_ADD": lambda args: exp.TsOrDsAdd(
|
||||
this=list_get(args, 0),
|
||||
expression=list_get(args, 1),
|
||||
unit=exp.Literal.string("DAY"),
|
||||
),
|
||||
"DATEDIFF": lambda args: exp.DateDiff(
|
||||
this=exp.TsOrDsToDate(this=list_get(args, 0)),
|
||||
expression=exp.TsOrDsToDate(this=list_get(args, 1)),
|
||||
),
|
||||
"DATE_SUB": lambda args: exp.TsOrDsAdd(
|
||||
this=list_get(args, 0),
|
||||
expression=exp.Mul(
|
||||
this=list_get(args, 1),
|
||||
expression=exp.Literal.number(-1),
|
||||
),
|
||||
unit=exp.Literal.string("DAY"),
|
||||
),
|
||||
"DATE_FORMAT": format_time_lambda(exp.TimeToStr, "hive"),
|
||||
"DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=list_get(args, 0))),
|
||||
"FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True),
|
||||
"GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
|
||||
"LOCATE": lambda args: exp.StrPosition(
|
||||
this=list_get(args, 1),
|
||||
substr=list_get(args, 0),
|
||||
position=list_get(args, 2),
|
||||
),
|
||||
"LOG": (
|
||||
lambda args: exp.Log.from_arg_list(args)
|
||||
if len(args) > 1
|
||||
else exp.Ln.from_arg_list(args)
|
||||
),
|
||||
"MAP": _parse_map,
|
||||
"MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
|
||||
"PERCENTILE": exp.Quantile.from_arg_list,
|
||||
"COLLECT_SET": exp.SetAgg.from_arg_list,
|
||||
"SIZE": exp.ArraySize.from_arg_list,
|
||||
"SPLIT": exp.RegexpSplit.from_arg_list,
|
||||
"TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
|
||||
"UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True),
|
||||
"YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
ROOT_PROPERTIES = [
|
||||
exp.PartitionedByProperty,
|
||||
exp.FileFormatProperty,
|
||||
exp.SchemaCommentProperty,
|
||||
exp.LocationProperty,
|
||||
exp.TableFormatProperty,
|
||||
]
|
||||
WITH_PROPERTIES = [exp.AnonymousProperty]
|
||||
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TEXT: "STRING",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
**transforms.UNALIAS_GROUP,
|
||||
exp.AnonymousProperty: _property_sql,
|
||||
exp.ApproxDistinct: approx_count_distinct_sql,
|
||||
exp.ArrayAgg: rename_func("COLLECT_LIST"),
|
||||
exp.ArraySize: rename_func("SIZE"),
|
||||
exp.ArraySort: _array_sort,
|
||||
exp.With: no_recursive_cte_sql,
|
||||
exp.DateAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.DateDiff: lambda self, e: f"DATEDIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.DateStrToDate: rename_func("TO_DATE"),
|
||||
exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.dateint_format}) AS INT)",
|
||||
exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.dateint_format})",
|
||||
exp.FileFormatProperty: lambda self, e: f"STORED AS {e.text('value').upper()}",
|
||||
exp.If: if_sql,
|
||||
exp.Index: _index_sql,
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.Join: _unnest_to_explode_sql,
|
||||
exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
|
||||
exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
|
||||
exp.Map: _map_sql,
|
||||
HiveMap: _map_sql,
|
||||
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e.args['value'])}",
|
||||
exp.Quantile: rename_func("PERCENTILE"),
|
||||
exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
|
||||
exp.RegexpSplit: rename_func("SPLIT"),
|
||||
exp.SafeDivide: no_safe_divide_sql,
|
||||
exp.SchemaCommentProperty: lambda self, e: f"COMMENT {self.sql(e.args['value'])}",
|
||||
exp.SetAgg: rename_func("COLLECT_SET"),
|
||||
exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))",
|
||||
exp.StrPosition: lambda self, e: f"LOCATE({csv(self.sql(e, 'substr'), self.sql(e, 'this'), self.sql(e, 'position'))})",
|
||||
exp.StrToDate: _str_to_date,
|
||||
exp.StrToTime: _str_to_time,
|
||||
exp.StrToUnix: _str_to_unix,
|
||||
exp.StructExtract: struct_extract_sql,
|
||||
exp.TableFormatProperty: lambda self, e: f"USING {self.sql(e, 'value')}",
|
||||
exp.TimeStrToDate: rename_func("TO_DATE"),
|
||||
exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)",
|
||||
exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
|
||||
exp.TimeToStr: _time_to_str,
|
||||
exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
|
||||
exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
|
||||
exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.TsOrDsToDate: _to_date_sql,
|
||||
exp.TryCast: no_trycast_sql,
|
||||
exp.UnixToStr: lambda self, e: f"FROM_UNIXTIME({csv(self.sql(e, 'this'), _time_format(self, e))})",
|
||||
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
|
||||
exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
|
||||
}
|
||||
|
||||
def with_properties(self, properties):
|
||||
return self.properties(
|
||||
properties,
|
||||
prefix="TBLPROPERTIES",
|
||||
)
|
||||
|
||||
def datatype_sql(self, expression):
|
||||
if (
|
||||
expression.this
|
||||
in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
|
||||
and not expression.expressions
|
||||
):
|
||||
expression = exp.DataType.build("text")
|
||||
return super().datatype_sql(expression)
|
163
sqlglot/dialects/mysql.py
Normal file
163
sqlglot/dialects/mysql.py
Normal file
|
@ -0,0 +1,163 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
no_ilike_sql,
|
||||
no_paren_current_date_sql,
|
||||
no_tablesample_sql,
|
||||
no_trycast_sql,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _date_trunc_sql(self, expression):
|
||||
unit = expression.text("unit").lower()
|
||||
|
||||
this = self.sql(expression.this)
|
||||
|
||||
if unit == "day":
|
||||
return f"DATE({this})"
|
||||
|
||||
if unit == "week":
|
||||
concat = f"CONCAT(YEAR({this}), ' ', WEEK({this}, 1), ' 1')"
|
||||
date_format = "%Y %u %w"
|
||||
elif unit == "month":
|
||||
concat = f"CONCAT(YEAR({this}), ' ', MONTH({this}), ' 1')"
|
||||
date_format = "%Y %c %e"
|
||||
elif unit == "quarter":
|
||||
concat = f"CONCAT(YEAR({this}), ' ', QUARTER({this}) * 3 - 2, ' 1')"
|
||||
date_format = "%Y %c %e"
|
||||
elif unit == "year":
|
||||
concat = f"CONCAT(YEAR({this}), ' 1 1')"
|
||||
date_format = "%Y %c %e"
|
||||
else:
|
||||
self.unsupported("Unexpected interval unit: {unit}")
|
||||
return f"DATE({this})"
|
||||
|
||||
return f"STR_TO_DATE({concat}, '{date_format}')"
|
||||
|
||||
|
||||
def _str_to_date(args):
|
||||
date_format = MySQL.format_time(list_get(args, 1))
|
||||
return exp.StrToDate(this=list_get(args, 0), format=date_format)
|
||||
|
||||
|
||||
def _str_to_date_sql(self, expression):
|
||||
date_format = self.format_time(expression)
|
||||
return f"STR_TO_DATE({self.sql(expression.this)}, {date_format})"
|
||||
|
||||
|
||||
def _date_add(expression_class):
|
||||
def func(args):
|
||||
interval = list_get(args, 1)
|
||||
return expression_class(
|
||||
this=list_get(args, 0),
|
||||
expression=interval.this,
|
||||
unit=exp.Literal.string(interval.text("unit").lower()),
|
||||
)
|
||||
|
||||
return func
|
||||
|
||||
|
||||
def _date_add_sql(kind):
|
||||
def func(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
unit = expression.text("unit").upper() or "DAY"
|
||||
expression = self.sql(expression, "expression")
|
||||
return f"DATE_{kind}({this}, INTERVAL {expression} {unit})"
|
||||
|
||||
return func
|
||||
|
||||
|
||||
class MySQL(Dialect):
|
||||
# https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions
|
||||
time_mapping = {
|
||||
"%M": "%B",
|
||||
"%c": "%-m",
|
||||
"%e": "%-d",
|
||||
"%h": "%I",
|
||||
"%i": "%M",
|
||||
"%s": "%S",
|
||||
"%S": "%S",
|
||||
"%u": "%W",
|
||||
}
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
QUOTES = ["'", '"']
|
||||
COMMENTS = ["--", "#", ("/*", "*/")]
|
||||
IDENTIFIERS = ["`"]
|
||||
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"_ARMSCII8": TokenType.INTRODUCER,
|
||||
"_ASCII": TokenType.INTRODUCER,
|
||||
"_BIG5": TokenType.INTRODUCER,
|
||||
"_BINARY": TokenType.INTRODUCER,
|
||||
"_CP1250": TokenType.INTRODUCER,
|
||||
"_CP1251": TokenType.INTRODUCER,
|
||||
"_CP1256": TokenType.INTRODUCER,
|
||||
"_CP1257": TokenType.INTRODUCER,
|
||||
"_CP850": TokenType.INTRODUCER,
|
||||
"_CP852": TokenType.INTRODUCER,
|
||||
"_CP866": TokenType.INTRODUCER,
|
||||
"_CP932": TokenType.INTRODUCER,
|
||||
"_DEC8": TokenType.INTRODUCER,
|
||||
"_EUCJPMS": TokenType.INTRODUCER,
|
||||
"_EUCKR": TokenType.INTRODUCER,
|
||||
"_GB18030": TokenType.INTRODUCER,
|
||||
"_GB2312": TokenType.INTRODUCER,
|
||||
"_GBK": TokenType.INTRODUCER,
|
||||
"_GEOSTD8": TokenType.INTRODUCER,
|
||||
"_GREEK": TokenType.INTRODUCER,
|
||||
"_HEBREW": TokenType.INTRODUCER,
|
||||
"_HP8": TokenType.INTRODUCER,
|
||||
"_KEYBCS2": TokenType.INTRODUCER,
|
||||
"_KOI8R": TokenType.INTRODUCER,
|
||||
"_KOI8U": TokenType.INTRODUCER,
|
||||
"_LATIN1": TokenType.INTRODUCER,
|
||||
"_LATIN2": TokenType.INTRODUCER,
|
||||
"_LATIN5": TokenType.INTRODUCER,
|
||||
"_LATIN7": TokenType.INTRODUCER,
|
||||
"_MACCE": TokenType.INTRODUCER,
|
||||
"_MACROMAN": TokenType.INTRODUCER,
|
||||
"_SJIS": TokenType.INTRODUCER,
|
||||
"_SWE7": TokenType.INTRODUCER,
|
||||
"_TIS620": TokenType.INTRODUCER,
|
||||
"_UCS2": TokenType.INTRODUCER,
|
||||
"_UJIS": TokenType.INTRODUCER,
|
||||
"_UTF8": TokenType.INTRODUCER,
|
||||
"_UTF16": TokenType.INTRODUCER,
|
||||
"_UTF16LE": TokenType.INTRODUCER,
|
||||
"_UTF32": TokenType.INTRODUCER,
|
||||
"_UTF8MB3": TokenType.INTRODUCER,
|
||||
"_UTF8MB4": TokenType.INTRODUCER,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
STRICT_CAST = False
|
||||
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"DATE_ADD": _date_add(exp.DateAdd),
|
||||
"DATE_SUB": _date_add(exp.DateSub),
|
||||
"STR_TO_DATE": _str_to_date,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
NULL_ORDERING_SUPPORTED = False
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.CurrentDate: no_paren_current_date_sql,
|
||||
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.TableSample: no_tablesample_sql,
|
||||
exp.TryCast: no_trycast_sql,
|
||||
exp.DateAdd: _date_add_sql("ADD"),
|
||||
exp.DateSub: _date_add_sql("SUB"),
|
||||
exp.DateTrunc: _date_trunc_sql,
|
||||
exp.StrToDate: _str_to_date_sql,
|
||||
exp.StrToTime: _str_to_date_sql,
|
||||
}
|
63
sqlglot/dialects/oracle.py
Normal file
63
sqlglot/dialects/oracle.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from sqlglot import exp, transforms
|
||||
from sqlglot.dialects.dialect import Dialect, no_ilike_sql
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import csv
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _limit_sql(self, expression):
|
||||
return self.fetch_sql(exp.Fetch(direction="FIRST", count=expression.expression))
|
||||
|
||||
|
||||
class Oracle(Dialect):
|
||||
class Generator(Generator):
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TINYINT: "NUMBER",
|
||||
exp.DataType.Type.SMALLINT: "NUMBER",
|
||||
exp.DataType.Type.INT: "NUMBER",
|
||||
exp.DataType.Type.BIGINT: "NUMBER",
|
||||
exp.DataType.Type.DECIMAL: "NUMBER",
|
||||
exp.DataType.Type.DOUBLE: "DOUBLE PRECISION",
|
||||
exp.DataType.Type.VARCHAR: "VARCHAR2",
|
||||
exp.DataType.Type.NVARCHAR: "NVARCHAR2",
|
||||
exp.DataType.Type.TEXT: "CLOB",
|
||||
exp.DataType.Type.BINARY: "BLOB",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
**transforms.UNALIAS_GROUP,
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.Limit: _limit_sql,
|
||||
}
|
||||
|
||||
def query_modifiers(self, expression, *sqls):
|
||||
return csv(
|
||||
*sqls,
|
||||
*[self.sql(sql) for sql in expression.args.get("laterals", [])],
|
||||
*[self.sql(sql) for sql in expression.args.get("joins", [])],
|
||||
self.sql(expression, "where"),
|
||||
self.sql(expression, "group"),
|
||||
self.sql(expression, "having"),
|
||||
self.sql(expression, "qualify"),
|
||||
self.sql(expression, "window"),
|
||||
self.sql(expression, "distribute"),
|
||||
self.sql(expression, "sort"),
|
||||
self.sql(expression, "cluster"),
|
||||
self.sql(expression, "order"),
|
||||
self.sql(expression, "offset"), # offset before limit in oracle
|
||||
self.sql(expression, "limit"),
|
||||
sep="",
|
||||
)
|
||||
|
||||
def offset_sql(self, expression):
|
||||
return f"{super().offset_sql(expression)} ROWS"
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"TOP": TokenType.TOP,
|
||||
"VARCHAR2": TokenType.VARCHAR,
|
||||
"NVARCHAR2": TokenType.NVARCHAR,
|
||||
}
|
109
sqlglot/dialects/postgres.py
Normal file
109
sqlglot/dialects/postgres.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
arrow_json_extract_scalar_sql,
|
||||
arrow_json_extract_sql,
|
||||
format_time_lambda,
|
||||
no_paren_current_date_sql,
|
||||
no_tablesample_sql,
|
||||
no_trycast_sql,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _date_add_sql(kind):
|
||||
def func(self, expression):
|
||||
from sqlglot.optimizer.simplify import simplify
|
||||
|
||||
this = self.sql(expression, "this")
|
||||
unit = self.sql(expression, "unit")
|
||||
expression = simplify(expression.args["expression"])
|
||||
|
||||
if not isinstance(expression, exp.Literal):
|
||||
self.unsupported("Cannot add non literal")
|
||||
|
||||
expression = expression.copy()
|
||||
expression.args["is_string"] = True
|
||||
expression = self.sql(expression)
|
||||
return f"{this} {kind} INTERVAL {expression} {unit}"
|
||||
|
||||
return func
|
||||
|
||||
|
||||
class Postgres(Dialect):
|
||||
null_ordering = "nulls_are_large"
|
||||
time_format = "'YYYY-MM-DD HH24:MI:SS'"
|
||||
time_mapping = {
|
||||
"AM": "%p", # AM or PM
|
||||
"D": "%w", # 1-based day of week
|
||||
"DD": "%d", # day of month
|
||||
"DDD": "%j", # zero padded day of year
|
||||
"FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres
|
||||
"FMDDD": "%-j", # day of year
|
||||
"FMHH12": "%-I", # 9
|
||||
"FMHH24": "%-H", # 9
|
||||
"FMMI": "%-M", # Minute
|
||||
"FMMM": "%-m", # 1
|
||||
"FMSS": "%-S", # Second
|
||||
"HH12": "%I", # 09
|
||||
"HH24": "%H", # 09
|
||||
"MI": "%M", # zero padded minute
|
||||
"MM": "%m", # 01
|
||||
"OF": "%z", # utc offset
|
||||
"SS": "%S", # zero padded second
|
||||
"TMDay": "%A", # TM is locale dependent
|
||||
"TMDy": "%a",
|
||||
"TMMon": "%b", # Sep
|
||||
"TMMonth": "%B", # September
|
||||
"TZ": "%Z", # uppercase timezone name
|
||||
"US": "%f", # zero padded microsecond
|
||||
"WW": "%U", # 1-based week of year
|
||||
"YY": "%y", # 15
|
||||
"YYYY": "%Y", # 2015
|
||||
}
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"SERIAL": TokenType.AUTO_INCREMENT,
|
||||
"UUID": TokenType.UUID,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
STRICT_CAST = False
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"TO_TIMESTAMP": format_time_lambda(exp.StrToTime, "postgres"),
|
||||
"TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TINYINT: "SMALLINT",
|
||||
exp.DataType.Type.FLOAT: "REAL",
|
||||
exp.DataType.Type.DOUBLE: "DOUBLE PRECISION",
|
||||
exp.DataType.Type.BINARY: "BYTEA",
|
||||
}
|
||||
|
||||
TOKEN_MAPPING = {
|
||||
TokenType.AUTO_INCREMENT: "SERIAL",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.JSONExtract: arrow_json_extract_sql,
|
||||
exp.JSONExtractScalar: arrow_json_extract_scalar_sql,
|
||||
exp.JSONBExtract: lambda self, e: f"{self.sql(e, 'this')}#>{self.sql(e, 'path')}",
|
||||
exp.JSONBExtractScalar: lambda self, e: f"{self.sql(e, 'this')}#>>{self.sql(e, 'path')}",
|
||||
exp.CurrentDate: no_paren_current_date_sql,
|
||||
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
||||
exp.DateAdd: _date_add_sql("+"),
|
||||
exp.DateSub: _date_add_sql("-"),
|
||||
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.TableSample: no_tablesample_sql,
|
||||
exp.TryCast: no_trycast_sql,
|
||||
}
|
216
sqlglot/dialects/presto.py
Normal file
216
sqlglot/dialects/presto.py
Normal file
|
@ -0,0 +1,216 @@
|
|||
from sqlglot import exp, transforms
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
format_time_lambda,
|
||||
if_sql,
|
||||
no_ilike_sql,
|
||||
no_safe_divide_sql,
|
||||
rename_func,
|
||||
str_position_sql,
|
||||
struct_extract_sql,
|
||||
)
|
||||
from sqlglot.dialects.mysql import MySQL
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import csv, list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _approx_distinct_sql(self, expression):
|
||||
accuracy = expression.args.get("accuracy")
|
||||
accuracy = ", " + self.sql(accuracy) if accuracy else ""
|
||||
return f"APPROX_DISTINCT({self.sql(expression, 'this')}{accuracy})"
|
||||
|
||||
|
||||
def _concat_ws_sql(self, expression):
|
||||
sep, *args = expression.expressions
|
||||
sep = self.sql(sep)
|
||||
if len(args) > 1:
|
||||
return f"ARRAY_JOIN(ARRAY[{csv(*(self.sql(e) for e in args))}], {sep})"
|
||||
return f"ARRAY_JOIN({self.sql(args[0])}, {sep})"
|
||||
|
||||
|
||||
def _datatype_sql(self, expression):
|
||||
sql = self.datatype_sql(expression)
|
||||
if expression.this == exp.DataType.Type.TIMESTAMPTZ:
|
||||
sql = f"{sql} WITH TIME ZONE"
|
||||
return sql
|
||||
|
||||
|
||||
def _date_parse_sql(self, expression):
|
||||
return f"DATE_PARSE({self.sql(expression, 'this')}, '%Y-%m-%d %H:%i:%s')"
|
||||
|
||||
|
||||
def _explode_to_unnest_sql(self, expression):
|
||||
if isinstance(expression.this, (exp.Explode, exp.Posexplode)):
|
||||
return self.sql(
|
||||
exp.Join(
|
||||
this=exp.Unnest(
|
||||
expressions=[expression.this.this],
|
||||
alias=expression.args.get("alias"),
|
||||
ordinality=isinstance(expression.this, exp.Posexplode),
|
||||
),
|
||||
kind="cross",
|
||||
)
|
||||
)
|
||||
return self.lateral_sql(expression)
|
||||
|
||||
|
||||
def _initcap_sql(self, expression):
|
||||
regex = r"(\w)(\w*)"
|
||||
return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
|
||||
|
||||
|
||||
def _no_sort_array(self, expression):
|
||||
if expression.args.get("asc") == exp.FALSE:
|
||||
comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END"
|
||||
else:
|
||||
comparator = None
|
||||
args = csv(self.sql(expression, "this"), comparator)
|
||||
return f"ARRAY_SORT({args})"
|
||||
|
||||
|
||||
def _schema_sql(self, expression):
|
||||
if isinstance(expression.parent, exp.Property):
|
||||
columns = ", ".join(f"'{c.text('this')}'" for c in expression.expressions)
|
||||
return f"ARRAY[{columns}]"
|
||||
|
||||
for schema in expression.parent.find_all(exp.Schema):
|
||||
if isinstance(schema.parent, exp.Property):
|
||||
expression = expression.copy()
|
||||
expression.expressions.extend(schema.expressions)
|
||||
|
||||
return self.schema_sql(expression)
|
||||
|
||||
|
||||
def _quantile_sql(self, expression):
|
||||
self.unsupported("Presto does not support exact quantiles")
|
||||
return f"APPROX_PERCENTILE({self.sql(expression, 'this')}, {self.sql(expression, 'quantile')})"
|
||||
|
||||
|
||||
def _str_to_time_sql(self, expression):
|
||||
return f"DATE_PARSE({self.sql(expression, 'this')}, {self.format_time(expression)})"
|
||||
|
||||
|
||||
def _ts_or_ds_to_date_sql(self, expression):
|
||||
time_format = self.format_time(expression)
|
||||
if time_format and time_format not in (Presto.time_format, Presto.date_format):
|
||||
return f"CAST({_str_to_time_sql(self, expression)} AS DATE)"
|
||||
return (
|
||||
f"CAST(SUBSTR(CAST({self.sql(expression, 'this')} AS VARCHAR), 1, 10) AS DATE)"
|
||||
)
|
||||
|
||||
|
||||
def _ts_or_ds_add_sql(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
e = self.sql(expression, "expression")
|
||||
unit = self.sql(expression, "unit") or "'day'"
|
||||
return f"DATE_ADD({unit}, {e}, DATE_PARSE(SUBSTR({this}, 1, 10), {Presto.date_format}))"
|
||||
|
||||
|
||||
class Presto(Dialect):
|
||||
index_offset = 1
|
||||
null_ordering = "nulls_are_last"
|
||||
time_format = "'%Y-%m-%d %H:%i:%S'"
|
||||
time_mapping = MySQL.time_mapping
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"ROW": TokenType.STRUCT,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list,
|
||||
"CARDINALITY": exp.ArraySize.from_arg_list,
|
||||
"CONTAINS": exp.ArrayContains.from_arg_list,
|
||||
"DATE_ADD": lambda args: exp.DateAdd(
|
||||
this=list_get(args, 2),
|
||||
expression=list_get(args, 1),
|
||||
unit=list_get(args, 0),
|
||||
),
|
||||
"DATE_DIFF": lambda args: exp.DateDiff(
|
||||
this=list_get(args, 2),
|
||||
expression=list_get(args, 1),
|
||||
unit=list_get(args, 0),
|
||||
),
|
||||
"DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
|
||||
"DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
|
||||
"FROM_UNIXTIME": exp.UnixToTime.from_arg_list,
|
||||
"STRPOS": exp.StrPosition.from_arg_list,
|
||||
"TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
|
||||
STRUCT_DELIMITER = ("(", ")")
|
||||
|
||||
WITH_PROPERTIES = [
|
||||
exp.PartitionedByProperty,
|
||||
exp.FileFormatProperty,
|
||||
exp.SchemaCommentProperty,
|
||||
exp.AnonymousProperty,
|
||||
exp.TableFormatProperty,
|
||||
]
|
||||
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.INT: "INTEGER",
|
||||
exp.DataType.Type.FLOAT: "REAL",
|
||||
exp.DataType.Type.BINARY: "VARBINARY",
|
||||
exp.DataType.Type.TEXT: "VARCHAR",
|
||||
exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
|
||||
exp.DataType.Type.STRUCT: "ROW",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
**transforms.UNALIAS_GROUP,
|
||||
exp.ApproxDistinct: _approx_distinct_sql,
|
||||
exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
|
||||
exp.ArrayContains: rename_func("CONTAINS"),
|
||||
exp.ArraySize: rename_func("CARDINALITY"),
|
||||
exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.BitwiseLeftShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.BitwiseNot: lambda self, e: f"BITWISE_NOT({self.sql(e, 'this')})",
|
||||
exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
|
||||
exp.ConcatWs: _concat_ws_sql,
|
||||
exp.DataType: _datatype_sql,
|
||||
exp.DateAdd: lambda self, e: f"""DATE_ADD({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""",
|
||||
exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""",
|
||||
exp.DateStrToDate: lambda self, e: f"CAST(DATE_PARSE({self.sql(e, 'this')}, {Presto.date_format}) AS DATE)",
|
||||
exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.dateint_format}) AS INT)",
|
||||
exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.dateint_format}) AS DATE)",
|
||||
exp.FileFormatProperty: lambda self, e: self.property_sql(e),
|
||||
exp.If: if_sql,
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.Initcap: _initcap_sql,
|
||||
exp.Lateral: _explode_to_unnest_sql,
|
||||
exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"),
|
||||
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED_BY = {self.sql(e.args['value'])}",
|
||||
exp.Quantile: _quantile_sql,
|
||||
exp.SafeDivide: no_safe_divide_sql,
|
||||
exp.Schema: _schema_sql,
|
||||
exp.SortArray: _no_sort_array,
|
||||
exp.StrPosition: str_position_sql,
|
||||
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
||||
exp.StrToTime: _str_to_time_sql,
|
||||
exp.StrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))",
|
||||
exp.StructExtract: struct_extract_sql,
|
||||
exp.TableFormatProperty: lambda self, e: f"TABLE_FORMAT = '{e.text('value').upper()}'",
|
||||
exp.TimeStrToDate: _date_parse_sql,
|
||||
exp.TimeStrToTime: _date_parse_sql,
|
||||
exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.time_format}))",
|
||||
exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.TimeToUnix: rename_func("TO_UNIXTIME"),
|
||||
exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
|
||||
exp.TsOrDsAdd: _ts_or_ds_add_sql,
|
||||
exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
|
||||
exp.UnixToStr: lambda self, e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})",
|
||||
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
|
||||
exp.UnixToTimeStr: lambda self, e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)",
|
||||
}
|
145
sqlglot/dialects/snowflake.py
Normal file
145
sqlglot/dialects/snowflake.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import Dialect, format_time_lambda, rename_func
|
||||
from sqlglot.expressions import Literal
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import list_get
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
def _check_int(s):
|
||||
if s[0] in ("-", "+"):
|
||||
return s[1:].isdigit()
|
||||
return s.isdigit()
|
||||
|
||||
|
||||
# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
|
||||
def _snowflake_to_timestamp(args):
|
||||
if len(args) == 2:
|
||||
first_arg, second_arg = args
|
||||
if second_arg.is_string:
|
||||
# case: <string_expr> [ , <format> ]
|
||||
return format_time_lambda(exp.StrToTime, "snowflake")(args)
|
||||
|
||||
# case: <numeric_expr> [ , <scale> ]
|
||||
if second_arg.name not in ["0", "3", "9"]:
|
||||
raise ValueError(
|
||||
f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9"
|
||||
)
|
||||
|
||||
if second_arg.name == "0":
|
||||
timescale = exp.UnixToTime.SECONDS
|
||||
elif second_arg.name == "3":
|
||||
timescale = exp.UnixToTime.MILLIS
|
||||
elif second_arg.name == "9":
|
||||
timescale = exp.UnixToTime.MICROS
|
||||
|
||||
return exp.UnixToTime(this=first_arg, scale=timescale)
|
||||
|
||||
first_arg = list_get(args, 0)
|
||||
if not isinstance(first_arg, Literal):
|
||||
# case: <variant_expr>
|
||||
return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
|
||||
|
||||
if first_arg.is_string:
|
||||
if _check_int(first_arg.this):
|
||||
# case: <integer>
|
||||
return exp.UnixToTime.from_arg_list(args)
|
||||
|
||||
# case: <date_expr>
|
||||
return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args)
|
||||
|
||||
# case: <numeric_expr>
|
||||
return exp.UnixToTime.from_arg_list(args)
|
||||
|
||||
|
||||
def _unix_to_time(self, expression):
|
||||
scale = expression.args.get("scale")
|
||||
timestamp = self.sql(expression, "this")
|
||||
if scale in [None, exp.UnixToTime.SECONDS]:
|
||||
return f"TO_TIMESTAMP({timestamp})"
|
||||
if scale == exp.UnixToTime.MILLIS:
|
||||
return f"TO_TIMESTAMP({timestamp}, 3)"
|
||||
if scale == exp.UnixToTime.MICROS:
|
||||
return f"TO_TIMESTAMP({timestamp}, 9)"
|
||||
|
||||
raise ValueError("Improper scale for timestamp")
|
||||
|
||||
|
||||
class Snowflake(Dialect):
|
||||
null_ordering = "nulls_are_large"
|
||||
time_format = "'yyyy-mm-dd hh24:mi:ss'"
|
||||
|
||||
time_mapping = {
|
||||
"YYYY": "%Y",
|
||||
"yyyy": "%Y",
|
||||
"YY": "%y",
|
||||
"yy": "%y",
|
||||
"MMMM": "%B",
|
||||
"mmmm": "%B",
|
||||
"MON": "%b",
|
||||
"mon": "%b",
|
||||
"MM": "%m",
|
||||
"mm": "%m",
|
||||
"DD": "%d",
|
||||
"dd": "%d",
|
||||
"d": "%-d",
|
||||
"DY": "%w",
|
||||
"dy": "%w",
|
||||
"HH24": "%H",
|
||||
"hh24": "%H",
|
||||
"HH12": "%I",
|
||||
"hh12": "%I",
|
||||
"MI": "%M",
|
||||
"mi": "%M",
|
||||
"SS": "%S",
|
||||
"ss": "%S",
|
||||
"FF": "%f",
|
||||
"ff": "%f",
|
||||
"FF6": "%f",
|
||||
"ff6": "%f",
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"ARRAYAGG": exp.ArrayAgg.from_arg_list,
|
||||
"IFF": exp.If.from_arg_list,
|
||||
"TO_TIMESTAMP": _snowflake_to_timestamp,
|
||||
}
|
||||
|
||||
COLUMN_OPERATORS = {
|
||||
**Parser.COLUMN_OPERATORS,
|
||||
TokenType.COLON: lambda self, this, path: self.expression(
|
||||
exp.Bracket,
|
||||
this=this,
|
||||
expressions=[path],
|
||||
),
|
||||
}
|
||||
|
||||
class Tokenizer(Tokenizer):
|
||||
QUOTES = ["'", "$$"]
|
||||
ESCAPE = "\\"
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"QUALIFY": TokenType.QUALIFY,
|
||||
"DOUBLE PRECISION": TokenType.DOUBLE,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.If: rename_func("IFF"),
|
||||
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.UnixToTime: _unix_to_time,
|
||||
}
|
||||
|
||||
def except_op(self, expression):
|
||||
if not expression.args.get("distinct", False):
|
||||
self.unsupported("EXCEPT with All is not supported in Snowflake")
|
||||
return super().except_op(expression)
|
||||
|
||||
def intersect_op(self, expression):
|
||||
if not expression.args.get("distinct", False):
|
||||
self.unsupported("INTERSECT with All is not supported in Snowflake")
|
||||
return super().intersect_op(expression)
|
106
sqlglot/dialects/spark.py
Normal file
106
sqlglot/dialects/spark.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import no_ilike_sql, rename_func
|
||||
from sqlglot.dialects.hive import Hive, HiveMap
|
||||
from sqlglot.helper import list_get
|
||||
|
||||
|
||||
def _create_sql(self, e):
|
||||
kind = e.args.get("kind")
|
||||
temporary = e.args.get("temporary")
|
||||
|
||||
if kind.upper() == "TABLE" and temporary is True:
|
||||
return f"CREATE TEMPORARY VIEW {self.sql(e, 'this')} AS {self.sql(e, 'expression')}"
|
||||
return self.create_sql(e)
|
||||
|
||||
|
||||
def _map_sql(self, expression):
|
||||
keys = self.sql(expression.args["keys"])
|
||||
values = self.sql(expression.args["values"])
|
||||
return f"MAP_FROM_ARRAYS({keys}, {values})"
|
||||
|
||||
|
||||
def _str_to_date(self, expression):
|
||||
this = self.sql(expression, "this")
|
||||
time_format = self.format_time(expression)
|
||||
if time_format == Hive.date_format:
|
||||
return f"TO_DATE({this})"
|
||||
return f"TO_DATE({this}, {time_format})"
|
||||
|
||||
|
||||
def _unix_to_time(self, expression):
|
||||
scale = expression.args.get("scale")
|
||||
timestamp = self.sql(expression, "this")
|
||||
if scale is None:
|
||||
return f"FROM_UNIXTIME({timestamp})"
|
||||
if scale == exp.UnixToTime.SECONDS:
|
||||
return f"TIMESTAMP_SECONDS({timestamp})"
|
||||
if scale == exp.UnixToTime.MILLIS:
|
||||
return f"TIMESTAMP_MILLIS({timestamp})"
|
||||
if scale == exp.UnixToTime.MICROS:
|
||||
return f"TIMESTAMP_MICROS({timestamp})"
|
||||
|
||||
raise ValueError("Improper scale for timestamp")
|
||||
|
||||
|
||||
class Spark(Hive):
|
||||
class Parser(Hive.Parser):
|
||||
FUNCTIONS = {
|
||||
**Hive.Parser.FUNCTIONS,
|
||||
"MAP_FROM_ARRAYS": exp.Map.from_arg_list,
|
||||
"TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list,
|
||||
"LEFT": lambda args: exp.Substring(
|
||||
this=list_get(args, 0),
|
||||
start=exp.Literal.number(1),
|
||||
length=list_get(args, 1),
|
||||
),
|
||||
"SHIFTLEFT": lambda args: exp.BitwiseLeftShift(
|
||||
this=list_get(args, 0),
|
||||
expression=list_get(args, 1),
|
||||
),
|
||||
"SHIFTRIGHT": lambda args: exp.BitwiseRightShift(
|
||||
this=list_get(args, 0),
|
||||
expression=list_get(args, 1),
|
||||
),
|
||||
"RIGHT": lambda args: exp.Substring(
|
||||
this=list_get(args, 0),
|
||||
start=exp.Sub(
|
||||
this=exp.Length(this=list_get(args, 0)),
|
||||
expression=exp.Add(
|
||||
this=list_get(args, 1), expression=exp.Literal.number(1)
|
||||
),
|
||||
),
|
||||
length=list_get(args, 1),
|
||||
),
|
||||
}
|
||||
|
||||
class Generator(Hive.Generator):
|
||||
TYPE_MAPPING = {
|
||||
**Hive.Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TINYINT: "BYTE",
|
||||
exp.DataType.Type.SMALLINT: "SHORT",
|
||||
exp.DataType.Type.BIGINT: "LONG",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**{
|
||||
k: v
|
||||
for k, v in Hive.Generator.TRANSFORMS.items()
|
||||
if k not in {exp.ArraySort}
|
||||
},
|
||||
exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
|
||||
exp.BitwiseLeftShift: rename_func("SHIFTLEFT"),
|
||||
exp.BitwiseRightShift: rename_func("SHIFTRIGHT"),
|
||||
exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */",
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.StrToDate: _str_to_date,
|
||||
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
|
||||
exp.UnixToTime: _unix_to_time,
|
||||
exp.Create: _create_sql,
|
||||
exp.Map: _map_sql,
|
||||
exp.Reduce: rename_func("AGGREGATE"),
|
||||
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
|
||||
HiveMap: _map_sql,
|
||||
}
|
||||
|
||||
def bitstring_sql(self, expression):
|
||||
return f"X'{self.sql(expression, 'this')}'"
|
63
sqlglot/dialects/sqlite.py
Normal file
63
sqlglot/dialects/sqlite.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import (
|
||||
Dialect,
|
||||
arrow_json_extract_scalar_sql,
|
||||
arrow_json_extract_sql,
|
||||
no_ilike_sql,
|
||||
no_tablesample_sql,
|
||||
no_trycast_sql,
|
||||
rename_func,
|
||||
)
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.parser import Parser
|
||||
from sqlglot.tokens import Tokenizer, TokenType
|
||||
|
||||
|
||||
class SQLite(Dialect):
|
||||
class Tokenizer(Tokenizer):
|
||||
IDENTIFIERS = ['"', ("[", "]"), "`"]
|
||||
|
||||
KEYWORDS = {
|
||||
**Tokenizer.KEYWORDS,
|
||||
"AUTOINCREMENT": TokenType.AUTO_INCREMENT,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"EDITDIST3": exp.Levenshtein.from_arg_list,
|
||||
}
|
||||
|
||||
class Generator(Generator):
|
||||
TYPE_MAPPING = {
|
||||
**Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.BOOLEAN: "INTEGER",
|
||||
exp.DataType.Type.TINYINT: "INTEGER",
|
||||
exp.DataType.Type.SMALLINT: "INTEGER",
|
||||
exp.DataType.Type.INT: "INTEGER",
|
||||
exp.DataType.Type.BIGINT: "INTEGER",
|
||||
exp.DataType.Type.FLOAT: "REAL",
|
||||
exp.DataType.Type.DOUBLE: "REAL",
|
||||
exp.DataType.Type.DECIMAL: "REAL",
|
||||
exp.DataType.Type.CHAR: "TEXT",
|
||||
exp.DataType.Type.NCHAR: "TEXT",
|
||||
exp.DataType.Type.VARCHAR: "TEXT",
|
||||
exp.DataType.Type.NVARCHAR: "TEXT",
|
||||
exp.DataType.Type.BINARY: "BLOB",
|
||||
}
|
||||
|
||||
TOKEN_MAPPING = {
|
||||
TokenType.AUTO_INCREMENT: "AUTOINCREMENT",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.ILike: no_ilike_sql,
|
||||
exp.JSONExtract: arrow_json_extract_sql,
|
||||
exp.JSONExtractScalar: arrow_json_extract_scalar_sql,
|
||||
exp.JSONBExtract: arrow_json_extract_sql,
|
||||
exp.JSONBExtractScalar: arrow_json_extract_scalar_sql,
|
||||
exp.Levenshtein: rename_func("EDITDIST3"),
|
||||
exp.TableSample: no_tablesample_sql,
|
||||
exp.TryCast: no_trycast_sql,
|
||||
}
|
12
sqlglot/dialects/starrocks.py
Normal file
12
sqlglot/dialects/starrocks.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.mysql import MySQL
|
||||
|
||||
|
||||
class StarRocks(MySQL):
|
||||
class Generator(MySQL.Generator):
|
||||
TYPE_MAPPING = {
|
||||
**MySQL.Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.TEXT: "STRING",
|
||||
exp.DataType.Type.TIMESTAMP: "DATETIME",
|
||||
exp.DataType.Type.TIMESTAMPTZ: "DATETIME",
|
||||
}
|
37
sqlglot/dialects/tableau.py
Normal file
37
sqlglot/dialects/tableau.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import Dialect
|
||||
from sqlglot.generator import Generator
|
||||
from sqlglot.helper import list_get
|
||||
from sqlglot.parser import Parser
|
||||
|
||||
|
||||
def _if_sql(self, expression):
|
||||
return f"IF {self.sql(expression, 'this')} THEN {self.sql(expression, 'true')} ELSE {self.sql(expression, 'false')} END"
|
||||
|
||||
|
||||
def _coalesce_sql(self, expression):
|
||||
return f"IFNULL({self.sql(expression, 'this')}, {self.expressions(expression)})"
|
||||
|
||||
|
||||
def _count_sql(self, expression):
|
||||
this = expression.this
|
||||
if isinstance(this, exp.Distinct):
|
||||
return f"COUNTD({self.sql(this, 'this')})"
|
||||
return f"COUNT({self.sql(expression, 'this')})"
|
||||
|
||||
|
||||
class Tableau(Dialect):
|
||||
class Generator(Generator):
|
||||
TRANSFORMS = {
|
||||
**Generator.TRANSFORMS,
|
||||
exp.If: _if_sql,
|
||||
exp.Coalesce: _coalesce_sql,
|
||||
exp.Count: _count_sql,
|
||||
}
|
||||
|
||||
class Parser(Parser):
|
||||
FUNCTIONS = {
|
||||
**Parser.FUNCTIONS,
|
||||
"IFNULL": exp.Coalesce.from_arg_list,
|
||||
"COUNTD": lambda args: exp.Count(this=exp.Distinct(this=list_get(args, 0))),
|
||||
}
|
10
sqlglot/dialects/trino.py
Normal file
10
sqlglot/dialects/trino.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from sqlglot import exp
|
||||
from sqlglot.dialects.presto import Presto
|
||||
|
||||
|
||||
class Trino(Presto):
|
||||
class Generator(Presto.Generator):
|
||||
TRANSFORMS = {
|
||||
**Presto.Generator.TRANSFORMS,
|
||||
exp.ArraySum: lambda self, e: f"REDUCE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue