1
0
Fork 0

Merging upstream version 6.2.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:31:47 +01:00
parent 0822fbed3a
commit 9bc11b290e
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
22 changed files with 312 additions and 45 deletions

View file

@ -14,3 +14,4 @@ from sqlglot.dialects.sqlite import SQLite
from sqlglot.dialects.starrocks import StarRocks
from sqlglot.dialects.tableau import Tableau
from sqlglot.dialects.trino import Trino
from sqlglot.dialects.tsql import TSQL

View file

@ -27,6 +27,7 @@ class Dialects(str, Enum):
STARROCKS = "starrocks"
TABLEAU = "tableau"
TRINO = "trino"
TSQL = "tsql"
class _Dialect(type):
@ -53,7 +54,6 @@ class _Dialect(type):
klass.parser_class = getattr(klass, "Parser", Parser)
klass.generator_class = getattr(klass, "Generator", Generator)
klass.tokenizer = klass.tokenizer_class()
klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0]
klass.identifier_start, klass.identifier_end = list(klass.tokenizer_class._IDENTIFIERS.items())[0]
@ -95,7 +95,6 @@ class Dialect(metaclass=_Dialect):
tokenizer_class = None
parser_class = None
generator_class = None
tokenizer = None
@classmethod
def get_or_raise(cls, dialect):
@ -138,6 +137,12 @@ class Dialect(metaclass=_Dialect):
def transpile(self, code, **opts):
return self.generate(self.parse(code), **opts)
@property
def tokenizer(self):
if not hasattr(self, "_tokenizer"):
self._tokenizer = self.tokenizer_class()
return self._tokenizer
def parser(self, **opts):
return self.parser_class(
**{
@ -170,7 +175,15 @@ class Dialect(metaclass=_Dialect):
def rename_func(name):
return lambda self, expression: f"{name}({csv(*[self.sql(e) for e in expression.args.values()])})"
def _rename(self, expression):
args = (
self.expressions(expression, flat=True)
if isinstance(expression, exp.Func) and expression.is_var_len_args
else csv(*[self.sql(e) for e in expression.args.values()])
)
return f"{name}({args})"
return _rename
def approx_count_distinct_sql(self, expression):

View file

@ -108,7 +108,7 @@ class DuckDB(Dialect):
TRANSFORMS = {
**Generator.TRANSFORMS,
exp.ApproxDistinct: approx_count_distinct_sql,
exp.Array: lambda self, e: f"LIST_VALUE({self.expressions(e, flat=True)})",
exp.Array: rename_func("LIST_VALUE"),
exp.ArraySize: rename_func("ARRAY_LENGTH"),
exp.ArraySort: _array_sort_sql,
exp.ArraySum: rename_func("LIST_SUM"),

View file

@ -106,6 +106,11 @@ class Snowflake(Dialect):
"TO_TIMESTAMP": _snowflake_to_timestamp,
}
FUNCTION_PARSERS = {
**Parser.FUNCTION_PARSERS,
"DATE_PART": lambda self: self._parse_extract(),
}
COLUMN_OPERATORS = {
**Parser.COLUMN_OPERATORS,
TokenType.COLON: lambda self, this, path: self.expression(
@ -118,10 +123,20 @@ class Snowflake(Dialect):
class Tokenizer(Tokenizer):
QUOTES = ["'", "$$"]
ESCAPE = "\\"
SINGLE_TOKENS = {
**Tokenizer.SINGLE_TOKENS,
"$": TokenType.DOLLAR, # needed to break for quotes
}
KEYWORDS = {
**Tokenizer.KEYWORDS,
"QUALIFY": TokenType.QUALIFY,
"DOUBLE PRECISION": TokenType.DOUBLE,
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMP_NTZ": TokenType.TIMESTAMP,
"TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
"TIMESTAMPNTZ": TokenType.TIMESTAMP,
}
class Generator(Generator):
@ -132,6 +147,11 @@ class Snowflake(Dialect):
exp.UnixToTime: _unix_to_time,
}
TYPE_MAPPING = {
**Generator.TYPE_MAPPING,
exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ",
}
def except_op(self, expression):
if not expression.args.get("distinct", False):
self.unsupported("EXCEPT with All is not supported in Snowflake")

View file

@ -82,6 +82,7 @@ class Spark(Hive):
TRANSFORMS = {
**{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort}},
exp.FileFormatProperty: lambda self, e: f"USING {e.text('value').upper()}",
exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
exp.BitwiseLeftShift: rename_func("SHIFTLEFT"),
exp.BitwiseRightShift: rename_func("SHIFTRIGHT"),

38
sqlglot/dialects/tsql.py Normal file
View file

@ -0,0 +1,38 @@
from sqlglot import exp
from sqlglot.dialects.dialect import Dialect
from sqlglot.generator import Generator
from sqlglot.tokens import Tokenizer, TokenType
class TSQL(Dialect):
null_ordering = "nulls_are_small"
time_format = "'yyyy-mm-dd hh:mm:ss'"
class Tokenizer(Tokenizer):
IDENTIFIERS = ['"', ("[", "]")]
KEYWORDS = {
**Tokenizer.KEYWORDS,
"BIT": TokenType.BOOLEAN,
"REAL": TokenType.FLOAT,
"NTEXT": TokenType.TEXT,
"SMALLDATETIME": TokenType.DATETIME,
"DATETIMEOFFSET": TokenType.TIMESTAMPTZ,
"TIME": TokenType.TIMESTAMP,
"VARBINARY": TokenType.BINARY,
"IMAGE": TokenType.IMAGE,
"MONEY": TokenType.MONEY,
"SMALLMONEY": TokenType.SMALLMONEY,
"ROWVERSION": TokenType.ROWVERSION,
"SQL_VARIANT": TokenType.SQL_VARIANT,
"UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER,
"XML": TokenType.XML,
}
class Generator(Generator):
TYPE_MAPPING = {
**Generator.TYPE_MAPPING,
exp.DataType.Type.BOOLEAN: "BIT",
exp.DataType.Type.INT: "INTEGER",
exp.DataType.Type.DECIMAL: "NUMERIC",
}