1
0
Fork 0

Merging upstream version 25.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:41:14 +01:00
parent 298e7a8147
commit 029b9c2c73
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
136 changed files with 80990 additions and 72541 deletions

View file

@ -173,6 +173,35 @@ def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str
return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))"
def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str:
is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True)
# Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks
# VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino
if not expression.args.get("variant_extract") or is_json_extract:
return self.func(
"JSON_EXTRACT", expression.this, expression.expression, *expression.expressions
)
this = self.sql(expression, "this")
# Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y
segments = []
for path_key in expression.expression.expressions[1:]:
if not isinstance(path_key, exp.JSONPathKey):
# Cannot transpile subscripts, wildcards etc to dot notation
self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access")
continue
key = path_key.this
if not exp.SAFE_IDENTIFIER_RE.match(key):
key = f'"{key}"'
segments.append(f".{key}")
expr = "".join(segments)
return f"{this}{expr}"
def _to_int(expression: exp.Expression) -> exp.Expression:
if not expression.type:
from sqlglot.optimizer.annotate_types import annotate_types
@ -227,7 +256,7 @@ class Presto(Dialect):
"TDIGEST": TokenType.TDIGEST,
"HYPERLOGLOG": TokenType.HLLSKETCH,
}
KEYWORDS.pop("/*+")
KEYWORDS.pop("QUALIFY")
class Parser(parser.Parser):
@ -305,6 +334,7 @@ class Presto(Dialect):
MULTI_ARG_DISTINCT = False
SUPPORTS_TO_NUMBER = False
HEX_FUNC = "TO_HEX"
PARSE_JSON_NAME = "JSON_PARSE"
PROPERTIES_LOCATION = {
**generator.Generator.PROPERTIES_LOCATION,
@ -389,7 +419,7 @@ class Presto(Dialect):
exp.If: if_sql(),
exp.ILike: no_ilike_sql,
exp.Initcap: _initcap_sql,
exp.ParseJSON: rename_func("JSON_PARSE"),
exp.JSONExtract: _jsonextract_sql,
exp.Last: _first_last_sql,
exp.LastValue: _first_last_sql,
exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this),
@ -448,9 +478,6 @@ class Presto(Dialect):
[transforms.remove_within_group_for_percentiles]
),
exp.Xor: bool_xor_sql,
exp.MD5: lambda self, e: self.func(
"LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this")))
),
exp.MD5Digest: rename_func("MD5"),
exp.SHA: rename_func("SHA1"),
exp.SHA2: sha256_sql,
@ -517,6 +544,19 @@ class Presto(Dialect):
"with",
}
def md5_sql(self, expression: exp.MD5) -> str:
this = expression.this
if not this.type:
from sqlglot.optimizer.annotate_types import annotate_types
this = annotate_types(this)
if this.is_type(*exp.DataType.TEXT_TYPES):
this = exp.Encode(this=this, charset=exp.Literal.string("utf-8"))
return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
def strtounix_sql(self, expression: exp.StrToUnix) -> str:
# Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one.
# To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a