1
0
Fork 0

Merging upstream version 9.0.6.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:51:47 +01:00
parent e369f04a93
commit 69b4fb4368
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
31 changed files with 694 additions and 196 deletions

View file

@ -1,5 +1,6 @@
from sqlglot.dialects.bigquery import BigQuery
from sqlglot.dialects.clickhouse import ClickHouse
from sqlglot.dialects.databricks import Databricks
from sqlglot.dialects.dialect import Dialect, Dialects
from sqlglot.dialects.duckdb import DuckDB
from sqlglot.dialects.hive import Hive

View file

@ -0,0 +1,21 @@
from sqlglot import exp
from sqlglot.dialects.dialect import parse_date_delta
from sqlglot.dialects.spark import Spark
from sqlglot.dialects.tsql import generate_date_delta_with_unit_sql
class Databricks(Spark):
class Parser(Spark.Parser):
FUNCTIONS = {
**Spark.Parser.FUNCTIONS,
"DATEADD": parse_date_delta(exp.DateAdd),
"DATE_ADD": parse_date_delta(exp.DateAdd),
"DATEDIFF": parse_date_delta(exp.DateDiff),
}
class Generator(Spark.Generator):
TRANSFORMS = {
**Spark.Generator.TRANSFORMS,
exp.DateAdd: generate_date_delta_with_unit_sql,
exp.DateDiff: generate_date_delta_with_unit_sql,
}

View file

@ -28,6 +28,7 @@ class Dialects(str, Enum):
TABLEAU = "tableau"
TRINO = "trino"
TSQL = "tsql"
DATABRICKS = "databricks"
class _Dialect(type):
@ -331,3 +332,15 @@ def create_with_partitions_sql(self, expression):
expression.set("this", schema)
return self.create_sql(expression)
def parse_date_delta(exp_class, unit_mapping=None):
def inner_func(args):
unit_based = len(args) == 3
this = list_get(args, 2) if unit_based else list_get(args, 0)
expression = list_get(args, 1) if unit_based else list_get(args, 1)
unit = list_get(args, 0) if unit_based else exp.Literal.string("DAY")
unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit
return exp_class(this=this, expression=expression, unit=unit)
return inner_func

View file

@ -111,6 +111,7 @@ def _unnest_to_explode_sql(self, expression):
self.sql(
exp.Lateral(
this=udtf(this=expression),
view=True,
alias=exp.TableAlias(this=alias.this, columns=[column]),
)
)
@ -283,6 +284,7 @@ class Hive(Dialect):
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'value')}",
exp.NumberToStr: rename_func("FORMAT_NUMBER"),
}
WITH_PROPERTIES = {exp.AnonymousProperty}

View file

@ -115,6 +115,7 @@ class Presto(Dialect):
class Tokenizer(Tokenizer):
KEYWORDS = {
**Tokenizer.KEYWORDS,
"VARBINARY": TokenType.BINARY,
"ROW": TokenType.STRUCT,
}

View file

@ -188,6 +188,8 @@ class Snowflake(Dialect):
}
class Generator(Generator):
CREATE_TRANSIENT = True
TRANSFORMS = {
**Generator.TRANSFORMS,
exp.ArrayConcat: rename_func("ARRAY_CAT"),

View file

@ -20,6 +20,7 @@ class SQLite(Dialect):
KEYWORDS = {
**Tokenizer.KEYWORDS,
"VARBINARY": TokenType.BINARY,
"AUTOINCREMENT": TokenType.AUTO_INCREMENT,
}

View file

@ -1,5 +1,7 @@
import re
from sqlglot import exp
from sqlglot.dialects.dialect import Dialect, rename_func
from sqlglot.dialects.dialect import Dialect, parse_date_delta, rename_func
from sqlglot.expressions import DataType
from sqlglot.generator import Generator
from sqlglot.helper import list_get
@ -27,6 +29,11 @@ DATE_DELTA_INTERVAL = {
}
DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})")
# N = Numeric, C=Currency
TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"}
def tsql_format_time_lambda(exp_class, full_format_mapping=None, default=None):
def _format_time(args):
return exp_class(
@ -42,26 +49,40 @@ def tsql_format_time_lambda(exp_class, full_format_mapping=None, default=None):
return _format_time
def parse_date_delta(exp_class):
def inner_func(args):
unit = DATE_DELTA_INTERVAL.get(list_get(args, 0).name.lower(), "day")
return exp_class(this=list_get(args, 2), expression=list_get(args, 1), unit=unit)
return inner_func
def parse_format(args):
fmt = list_get(args, 1)
number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this)
if number_fmt:
return exp.NumberToStr(this=list_get(args, 0), format=fmt)
return exp.TimeToStr(
this=list_get(args, 0),
format=exp.Literal.string(
format_time(fmt.name, TSQL.format_time_mapping)
if len(fmt.name) == 1
else format_time(fmt.name, TSQL.time_mapping)
),
)
def generate_date_delta(self, e):
def generate_date_delta_with_unit_sql(self, e):
func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF"
return f"{func}({self.format_args(e.text('unit'), e.expression, e.this)})"
def generate_format_sql(self, e):
fmt = (
e.args["format"]
if isinstance(e, exp.NumberToStr)
else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping))
)
return f"FORMAT({self.format_args(e.this, fmt)})"
class TSQL(Dialect):
null_ordering = "nulls_are_small"
time_format = "'yyyy-mm-dd hh:mm:ss'"
time_mapping = {
"yyyy": "%Y",
"yy": "%y",
"year": "%Y",
"qq": "%q",
"q": "%q",
@ -101,6 +122,8 @@ class TSQL(Dialect):
"H": "%-H",
"h": "%-I",
"S": "%f",
"yyyy": "%Y",
"yy": "%y",
}
convert_format_mapping = {
@ -143,6 +166,27 @@ class TSQL(Dialect):
"120": "%Y-%m-%d %H:%M:%S",
"121": "%Y-%m-%d %H:%M:%S.%f",
}
# not sure if complete
format_time_mapping = {
"y": "%B %Y",
"d": "%m/%d/%Y",
"H": "%-H",
"h": "%-I",
"s": "%Y-%m-%d %H:%M:%S",
"D": "%A,%B,%Y",
"f": "%A,%B,%Y %-I:%M %p",
"F": "%A,%B,%Y %-I:%M:%S %p",
"g": "%m/%d/%Y %-I:%M %p",
"G": "%m/%d/%Y %-I:%M:%S %p",
"M": "%B %-d",
"m": "%B %-d",
"O": "%Y-%m-%dT%H:%M:%S",
"u": "%Y-%M-%D %H:%M:%S%z",
"U": "%A, %B %D, %Y %H:%M:%S%z",
"T": "%-I:%M:%S %p",
"t": "%-I:%M",
"Y": "%a %Y",
}
class Tokenizer(Tokenizer):
IDENTIFIERS = ['"', ("[", "]")]
@ -166,6 +210,7 @@ class TSQL(Dialect):
"SQL_VARIANT": TokenType.VARIANT,
"NVARCHAR(MAX)": TokenType.TEXT,
"VARCHAR(MAX)": TokenType.TEXT,
"TOP": TokenType.TOP,
}
class Parser(Parser):
@ -173,8 +218,8 @@ class TSQL(Dialect):
**Parser.FUNCTIONS,
"CHARINDEX": exp.StrPosition.from_arg_list,
"ISNULL": exp.Coalesce.from_arg_list,
"DATEADD": parse_date_delta(exp.DateAdd),
"DATEDIFF": parse_date_delta(exp.DateDiff),
"DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL),
"DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL),
"DATENAME": tsql_format_time_lambda(exp.TimeToStr, full_format_mapping=True),
"DATEPART": tsql_format_time_lambda(exp.TimeToStr),
"GETDATE": exp.CurrentDate.from_arg_list,
@ -182,6 +227,7 @@ class TSQL(Dialect):
"LEN": exp.Length.from_arg_list,
"REPLICATE": exp.Repeat.from_arg_list,
"JSON_VALUE": exp.JSONExtractScalar.from_arg_list,
"FORMAT": parse_format,
}
VAR_LENGTH_DATATYPES = {
@ -194,7 +240,7 @@ class TSQL(Dialect):
def _parse_convert(self, strict):
to = self._parse_types()
self._match(TokenType.COMMA)
this = self._parse_field()
this = self._parse_column()
# Retrieve length of datatype and override to default if not specified
if list_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES:
@ -238,8 +284,10 @@ class TSQL(Dialect):
TRANSFORMS = {
**Generator.TRANSFORMS,
exp.DateAdd: lambda self, e: generate_date_delta(self, e),
exp.DateDiff: lambda self, e: generate_date_delta(self, e),
exp.DateAdd: generate_date_delta_with_unit_sql,
exp.DateDiff: generate_date_delta_with_unit_sql,
exp.CurrentDate: rename_func("GETDATE"),
exp.If: rename_func("IIF"),
exp.NumberToStr: generate_format_sql,
exp.TimeToStr: generate_format_sql,
}