2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot import exp
|
2025-02-13 14:47:39 +01:00
|
|
|
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
|
2025-02-13 14:44:19 +01:00
|
|
|
from sqlglot.dialects.hive import Hive
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot.helper import list_get
|
2025-02-13 14:44:19 +01:00
|
|
|
from sqlglot.parser import Parser
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _create_sql(self, e):
|
|
|
|
kind = e.args.get("kind")
|
|
|
|
temporary = e.args.get("temporary")
|
|
|
|
|
|
|
|
if kind.upper() == "TABLE" and temporary is True:
|
|
|
|
return f"CREATE TEMPORARY VIEW {self.sql(e, 'this')} AS {self.sql(e, 'expression')}"
|
2025-02-13 14:37:25 +01:00
|
|
|
return create_with_partitions_sql(self, e)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _map_sql(self, expression):
|
|
|
|
keys = self.sql(expression.args["keys"])
|
|
|
|
values = self.sql(expression.args["values"])
|
|
|
|
return f"MAP_FROM_ARRAYS({keys}, {values})"
|
|
|
|
|
|
|
|
|
|
|
|
def _str_to_date(self, expression):
|
|
|
|
this = self.sql(expression, "this")
|
|
|
|
time_format = self.format_time(expression)
|
|
|
|
if time_format == Hive.date_format:
|
|
|
|
return f"TO_DATE({this})"
|
|
|
|
return f"TO_DATE({this}, {time_format})"
|
|
|
|
|
|
|
|
|
|
|
|
def _unix_to_time(self, expression):
|
|
|
|
scale = expression.args.get("scale")
|
|
|
|
timestamp = self.sql(expression, "this")
|
|
|
|
if scale is None:
|
|
|
|
return f"FROM_UNIXTIME({timestamp})"
|
|
|
|
if scale == exp.UnixToTime.SECONDS:
|
|
|
|
return f"TIMESTAMP_SECONDS({timestamp})"
|
|
|
|
if scale == exp.UnixToTime.MILLIS:
|
|
|
|
return f"TIMESTAMP_MILLIS({timestamp})"
|
|
|
|
if scale == exp.UnixToTime.MICROS:
|
|
|
|
return f"TIMESTAMP_MICROS({timestamp})"
|
|
|
|
|
|
|
|
raise ValueError("Improper scale for timestamp")
|
|
|
|
|
|
|
|
|
|
|
|
class Spark(Hive):
|
|
|
|
class Parser(Hive.Parser):
|
|
|
|
FUNCTIONS = {
|
|
|
|
**Hive.Parser.FUNCTIONS,
|
|
|
|
"MAP_FROM_ARRAYS": exp.Map.from_arg_list,
|
|
|
|
"TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list,
|
|
|
|
"LEFT": lambda args: exp.Substring(
|
|
|
|
this=list_get(args, 0),
|
|
|
|
start=exp.Literal.number(1),
|
|
|
|
length=list_get(args, 1),
|
|
|
|
),
|
|
|
|
"SHIFTLEFT": lambda args: exp.BitwiseLeftShift(
|
|
|
|
this=list_get(args, 0),
|
|
|
|
expression=list_get(args, 1),
|
|
|
|
),
|
|
|
|
"SHIFTRIGHT": lambda args: exp.BitwiseRightShift(
|
|
|
|
this=list_get(args, 0),
|
|
|
|
expression=list_get(args, 1),
|
|
|
|
),
|
|
|
|
"RIGHT": lambda args: exp.Substring(
|
|
|
|
this=list_get(args, 0),
|
|
|
|
start=exp.Sub(
|
|
|
|
this=exp.Length(this=list_get(args, 0)),
|
2025-02-13 07:47:22 +01:00
|
|
|
expression=exp.Add(this=list_get(args, 1), expression=exp.Literal.number(1)),
|
2025-02-13 06:15:54 +01:00
|
|
|
),
|
|
|
|
length=list_get(args, 1),
|
|
|
|
),
|
2025-02-13 07:47:22 +01:00
|
|
|
"APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
|
2025-02-13 14:49:58 +01:00
|
|
|
"IIF": exp.If.from_arg_list,
|
2025-02-13 06:15:54 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 14:44:19 +01:00
|
|
|
FUNCTION_PARSERS = {
|
|
|
|
**Parser.FUNCTION_PARSERS,
|
|
|
|
"BROADCAST": lambda self: self._parse_join_hint("BROADCAST"),
|
|
|
|
"BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"),
|
|
|
|
"MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"),
|
|
|
|
"MERGE": lambda self: self._parse_join_hint("MERGE"),
|
|
|
|
"SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"),
|
|
|
|
"MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"),
|
|
|
|
"SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"),
|
|
|
|
"SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"),
|
|
|
|
}
|
2025-02-13 14:37:25 +01:00
|
|
|
|
2025-02-13 14:44:19 +01:00
|
|
|
class Generator(Hive.Generator):
|
2025-02-13 06:15:54 +01:00
|
|
|
TYPE_MAPPING = {
|
|
|
|
**Hive.Generator.TYPE_MAPPING,
|
|
|
|
exp.DataType.Type.TINYINT: "BYTE",
|
|
|
|
exp.DataType.Type.SMALLINT: "SHORT",
|
|
|
|
exp.DataType.Type.BIGINT: "LONG",
|
|
|
|
}
|
|
|
|
|
|
|
|
TRANSFORMS = {
|
2025-02-13 14:47:39 +01:00
|
|
|
**{k: v for k, v in Hive.Generator.TRANSFORMS.items() if k not in {exp.ArraySort, exp.ILike}},
|
|
|
|
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
|
2025-02-13 14:30:50 +01:00
|
|
|
exp.FileFormatProperty: lambda self, e: f"USING {e.text('value').upper()}",
|
2025-02-13 06:15:54 +01:00
|
|
|
exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
|
|
|
|
exp.BitwiseLeftShift: rename_func("SHIFTLEFT"),
|
|
|
|
exp.BitwiseRightShift: rename_func("SHIFTRIGHT"),
|
2025-02-13 14:47:39 +01:00
|
|
|
exp.DateTrunc: rename_func("TRUNC"),
|
2025-02-13 06:15:54 +01:00
|
|
|
exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */",
|
|
|
|
exp.StrToDate: _str_to_date,
|
|
|
|
exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
|
|
|
|
exp.UnixToTime: _unix_to_time,
|
|
|
|
exp.Create: _create_sql,
|
|
|
|
exp.Map: _map_sql,
|
|
|
|
exp.Reduce: rename_func("AGGREGATE"),
|
|
|
|
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
|
2025-02-13 14:47:39 +01:00
|
|
|
exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
|
|
|
|
exp.VariancePop: rename_func("VAR_POP"),
|
2025-02-13 14:49:58 +01:00
|
|
|
exp.DateFromParts: rename_func("MAKE_DATE"),
|
2025-02-13 06:15:54 +01:00
|
|
|
}
|
|
|
|
|
2025-02-13 14:44:19 +01:00
|
|
|
WRAP_DERIVED_VALUES = False
|
|
|
|
|
2025-02-13 07:47:22 +01:00
|
|
|
class Tokenizer(Hive.Tokenizer):
|
|
|
|
HEX_STRINGS = [("X'", "'")]
|