1
0
Fork 0

Merging upstream version 9.0.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:48:46 +01:00
parent ebb36a5fc5
commit 4483b8ff47
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
87 changed files with 7994 additions and 421 deletions

View file

@ -78,6 +78,16 @@ def _create_sql(self, expression):
class BigQuery(Dialect):
unnest_column_only = True
time_mapping = {
"%M": "%-M",
"%d": "%-d",
"%m": "%-m",
"%y": "%-y",
"%H": "%-H",
"%I": "%-I",
"%S": "%-S",
"%j": "%-j",
}
class Tokenizer(Tokenizer):
QUOTES = [
@ -113,6 +123,7 @@ class BigQuery(Dialect):
"DATETIME_SUB": _date_add(exp.DatetimeSub),
"TIME_SUB": _date_add(exp.TimeSub),
"TIMESTAMP_SUB": _date_add(exp.TimestampSub),
"PARSE_TIMESTAMP": lambda args: exp.StrToTime(this=list_get(args, 1), format=list_get(args, 0)),
}
NO_PAREN_FUNCTIONS = {
@ -137,6 +148,7 @@ class BigQuery(Dialect):
exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
exp.ILike: no_ilike_sql,
exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})",
exp.TimeAdd: _date_add_sql("TIME", "ADD"),
exp.TimeSub: _date_add_sql("TIME", "SUB"),
exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),

View file

@ -2,7 +2,7 @@ from enum import Enum
from sqlglot import exp
from sqlglot.generator import Generator
from sqlglot.helper import list_get
from sqlglot.helper import flatten, list_get
from sqlglot.parser import Parser
from sqlglot.time import format_time
from sqlglot.tokens import Tokenizer
@ -67,6 +67,11 @@ class _Dialect(type):
klass.generator_class.TRANSFORMS[
exp.HexString
] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
if klass.tokenizer_class._BYTE_STRINGS and exp.ByteString not in klass.generator_class.TRANSFORMS:
be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
klass.generator_class.TRANSFORMS[
exp.ByteString
] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"
return klass
@ -176,11 +181,7 @@ class Dialect(metaclass=_Dialect):
def rename_func(name):
def _rename(self, expression):
args = (
expression.expressions
if isinstance(expression, exp.Func) and expression.is_var_len_args
else expression.args.values()
)
args = flatten(expression.args.values())
return f"{name}({self.format_args(*args)})"
return _rename

View file

@ -121,6 +121,9 @@ class Hive(Dialect):
"ss": "%S",
"s": "%-S",
"S": "%f",
"a": "%p",
"DD": "%j",
"D": "%-j",
}
date_format = "'yyyy-MM-dd'"
@ -200,6 +203,7 @@ class Hive(Dialect):
exp.AnonymousProperty: _property_sql,
exp.ApproxDistinct: approx_count_distinct_sql,
exp.ArrayAgg: rename_func("COLLECT_LIST"),
exp.ArrayConcat: rename_func("CONCAT"),
exp.ArraySize: rename_func("SIZE"),
exp.ArraySort: _array_sort,
exp.With: no_recursive_cte_sql,

View file

@ -97,6 +97,8 @@ class MySQL(Dialect):
"%s": "%S",
"%S": "%S",
"%u": "%W",
"%k": "%-H",
"%l": "%-I",
}
class Tokenizer(Tokenizer):
@ -145,6 +147,9 @@ class MySQL(Dialect):
"_TIS620": TokenType.INTRODUCER,
"_UCS2": TokenType.INTRODUCER,
"_UJIS": TokenType.INTRODUCER,
# https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
"N": TokenType.INTRODUCER,
"n": TokenType.INTRODUCER,
"_UTF8": TokenType.INTRODUCER,
"_UTF16": TokenType.INTRODUCER,
"_UTF16LE": TokenType.INTRODUCER,

View file

@ -80,17 +80,12 @@ class Oracle(Dialect):
sep="",
)
def alias_sql(self, expression):
if isinstance(expression.this, exp.Table):
to_sql = self.sql(expression, "alias")
# oracle does not allow "AS" between table and alias
to_sql = f" {to_sql}" if to_sql else ""
return f"{self.sql(expression, 'this')}{to_sql}"
return super().alias_sql(expression)
def offset_sql(self, expression):
return f"{super().offset_sql(expression)} ROWS"
def table_sql(self, expression):
return super().table_sql(expression, sep=" ")
class Tokenizer(Tokenizer):
KEYWORDS = {
**Tokenizer.KEYWORDS,

View file

@ -163,6 +163,7 @@ class Postgres(Dialect):
class Tokenizer(Tokenizer):
BIT_STRINGS = [("b'", "'"), ("B'", "'")]
HEX_STRINGS = [("x'", "'"), ("X'", "'")]
BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
KEYWORDS = {
**Tokenizer.KEYWORDS,
"ALWAYS": TokenType.ALWAYS,
@ -176,6 +177,11 @@ class Postgres(Dialect):
"SMALLSERIAL": TokenType.SMALLSERIAL,
"UUID": TokenType.UUID,
}
QUOTES = ["'", "$$"]
SINGLE_TOKENS = {
**Tokenizer.SINGLE_TOKENS,
"$": TokenType.PARAMETER,
}
class Parser(Parser):
STRICT_CAST = False

View file

@ -172,6 +172,7 @@ class Presto(Dialect):
**transforms.UNALIAS_GROUP,
exp.ApproxDistinct: _approx_distinct_sql,
exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
exp.ArrayConcat: rename_func("CONCAT"),
exp.ArrayContains: rename_func("CONTAINS"),
exp.ArraySize: rename_func("CARDINALITY"),
exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})",

View file

@ -69,6 +69,35 @@ def _unix_to_time(self, expression):
raise ValueError("Improper scale for timestamp")
# https://docs.snowflake.com/en/sql-reference/functions/date_part.html
# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
def _parse_date_part(self):
this = self._parse_var() or self._parse_type()
self._match(TokenType.COMMA)
expression = self._parse_bitwise()
name = this.name.upper()
if name.startswith("EPOCH"):
if name.startswith("EPOCH_MILLISECOND"):
scale = 10**3
elif name.startswith("EPOCH_MICROSECOND"):
scale = 10**6
elif name.startswith("EPOCH_NANOSECOND"):
scale = 10**9
else:
scale = None
ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
to_unix = self.expression(exp.TimeToUnix, this=ts)
if scale:
to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
return to_unix
return self.expression(exp.Extract, this=this, expression=expression)
class Snowflake(Dialect):
null_ordering = "nulls_are_large"
time_format = "'yyyy-mm-dd hh24:mi:ss'"
@ -115,7 +144,7 @@ class Snowflake(Dialect):
FUNCTION_PARSERS = {
**Parser.FUNCTION_PARSERS,
"DATE_PART": lambda self: self._parse_extract(),
"DATE_PART": _parse_date_part,
}
FUNC_TOKENS = {
@ -161,9 +190,11 @@ class Snowflake(Dialect):
class Generator(Generator):
TRANSFORMS = {
**Generator.TRANSFORMS,
exp.ArrayConcat: rename_func("ARRAY_CAT"),
exp.If: rename_func("IFF"),
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
exp.UnixToTime: _unix_to_time,
exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
exp.Array: inline_array_sql,
exp.StrPosition: rename_func("POSITION"),
exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}",

View file

@ -1,9 +1,5 @@
from sqlglot import exp
from sqlglot.dialects.dialect import (
create_with_partitions_sql,
no_ilike_sql,
rename_func,
)
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
from sqlglot.dialects.hive import Hive
from sqlglot.helper import list_get
from sqlglot.parser import Parser
@ -98,13 +94,14 @@ class Spark(Hive):
}
TRANSFORMS = {
**{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort}},
**{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort, exp.ILike}},
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
exp.FileFormatProperty: lambda self, e: f"USING {e.text('value').upper()}",
exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
exp.BitwiseLeftShift: rename_func("SHIFTLEFT"),
exp.BitwiseRightShift: rename_func("SHIFTRIGHT"),
exp.DateTrunc: rename_func("TRUNC"),
exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */",
exp.ILike: no_ilike_sql,
exp.StrToDate: _str_to_date,
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
exp.UnixToTime: _unix_to_time,
@ -112,6 +109,8 @@ class Spark(Hive):
exp.Map: _map_sql,
exp.Reduce: rename_func("AGGREGATE"),
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
exp.VariancePop: rename_func("VAR_POP"),
}
WRAP_DERIVED_VALUES = False

View file

@ -32,6 +32,11 @@ class TSQL(Dialect):
}
class Parser(Parser):
FUNCTIONS = {
**Parser.FUNCTIONS,
"CHARINDEX": exp.StrPosition.from_arg_list,
}
def _parse_convert(self):
to = self._parse_types()
self._match(TokenType.COMMA)