1
0
Fork 0

Adding upstream version 18.2.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 20:56:33 +01:00
parent 9de781a59b
commit ab14e550ff
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
124 changed files with 60313 additions and 50346 deletions

View file

@ -50,7 +50,7 @@ TIME_DIFF_FACTOR = {
DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
unit = expression.text("unit").upper()
func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
@ -69,7 +69,7 @@ def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateS
return self.func(func, expression.this, modified_increment)
def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str:
unit = expression.text("unit").upper()
factor = TIME_DIFF_FACTOR.get(unit)
@ -87,7 +87,7 @@ def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
return f"{diff_sql}{multiplier_sql}"
def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str:
def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
this = expression.this
if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string:
# Since FROM_JSON requires a nested type, we always wrap the json string with
@ -103,21 +103,21 @@ def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> s
return self.func("TO_JSON", this, expression.args.get("options"))
def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str:
def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
if expression.expression:
self.unsupported("Hive SORT_ARRAY does not support a comparator")
return f"SORT_ARRAY({self.sql(expression, 'this')})"
def _property_sql(self: generator.Generator, expression: exp.Property) -> str:
def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
return f"'{expression.name}'={self.sql(expression, 'value')}"
def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str:
def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str:
def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@ -125,7 +125,7 @@ def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> st
return f"CAST({this} AS DATE)"
def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str:
def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@ -133,13 +133,13 @@ def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> st
return f"CAST({this} AS TIMESTAMP)"
def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str:
def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
return f"DATE_FORMAT({this}, {time_format})"
def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str:
def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@ -206,6 +206,8 @@ class Hive(Dialect):
"MSCK REPAIR": TokenType.COMMAND,
"REFRESH": TokenType.COMMAND,
"WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
"TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
"VERSION AS OF": TokenType.VERSION_SNAPSHOT,
}
NUMERIC_LITERALS = {
@ -220,6 +222,7 @@ class Hive(Dialect):
class Parser(parser.Parser):
LOG_DEFAULTS_TO_LN = True
STRICT_CAST = False
SUPPORTS_USER_DEFINED_TYPES = False
FUNCTIONS = {
**parser.Parser.FUNCTIONS,
@ -257,6 +260,11 @@ class Hive(Dialect):
),
"SIZE": exp.ArraySize.from_arg_list,
"SPLIT": exp.RegexpSplit.from_arg_list,
"STR_TO_MAP": lambda args: exp.StrToMap(
this=seq_get(args, 0),
pair_delim=seq_get(args, 1) or exp.Literal.string(","),
key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
),
"TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
"TO_JSON": exp.JSONFormat.from_arg_list,
"UNBASE64": exp.FromBase64.from_arg_list,
@ -313,7 +321,7 @@ class Hive(Dialect):
)
def _parse_types(
self, check_func: bool = False, schema: bool = False
self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
) -> t.Optional[exp.Expression]:
"""
Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
@ -333,7 +341,9 @@ class Hive(Dialect):
Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
"""
this = super()._parse_types(check_func=check_func, schema=schema)
this = super()._parse_types(
check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
)
if this and not schema:
return this.transform(
@ -345,6 +355,16 @@ class Hive(Dialect):
return this
def _parse_partition_and_order(
self,
) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
return (
self._parse_csv(self._parse_conjunction)
if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
else [],
super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
)
class Generator(generator.Generator):
LIMIT_FETCH = "LIMIT"
TABLESAMPLE_WITH_METHOD = False
@ -354,6 +374,7 @@ class Hive(Dialect):
QUERY_HINTS = False
INDEX_ON = "ON TABLE"
EXTRACT_ALLOWS_QUOTES = False
NVL2_SUPPORTED = False
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING,
@ -376,6 +397,7 @@ class Hive(Dialect):
]
),
exp.Property: _property_sql,
exp.AnyValue: rename_func("FIRST"),
exp.ApproxDistinct: approx_count_distinct_sql,
exp.ArrayConcat: rename_func("CONCAT"),
exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
@ -402,6 +424,9 @@ class Hive(Dialect):
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
exp.Min: min_or_least,
exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
exp.NotNullColumnConstraint: lambda self, e: ""
if e.args.get("allow_null")
else "NOT NULL",
exp.VarMap: var_map_sql,
exp.Create: create_with_partitions_sql,
exp.Quantile: rename_func("PERCENTILE"),
@ -472,7 +497,7 @@ class Hive(Dialect):
elif expression.this in exp.DataType.TEMPORAL_TYPES:
expression = exp.DataType.build(expression.this)
elif expression.is_type("float"):
size_expression = expression.find(exp.DataTypeSize)
size_expression = expression.find(exp.DataTypeParam)
if size_expression:
size = int(size_expression.name)
expression = (
@ -480,3 +505,7 @@ class Hive(Dialect):
)
return super().datatype_sql(expression)
def version_sql(self, expression: exp.Version) -> str:
sql = super().version_sql(expression)
return sql.replace("FOR ", "", 1)