Merging upstream version 20.3.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:17:51 +01:00 · 2025-02-13 21:17:51 +01:00 · 4d9376ba93
commit 4d9376ba93
parent 2945bcc4f7
132 changed files with 55125 additions and 51576 deletions
--- a/sqlglot/dialects/presto.py
+++ b/sqlglot/dialects/presto.py
@ -186,6 +186,27 @@ def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str
    return ""


+def _to_int(expression: exp.Expression) -> exp.Expression:
+    if not expression.type:
+        from sqlglot.optimizer.annotate_types import annotate_types
+
+        annotate_types(expression)
+    if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES:
+        return exp.cast(expression, to=exp.DataType.Type.BIGINT)
+    return expression
+
+
+def _parse_to_char(args: t.List) -> exp.TimeToStr:
+    fmt = seq_get(args, 1)
+    if isinstance(fmt, exp.Literal):
+        # We uppercase this to match Teradata's format mapping keys
+        fmt.set("this", fmt.this.upper())
+
+    # We use "teradata" on purpose here, because the time formats are different in Presto.
+    # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char
+    return format_time_lambda(exp.TimeToStr, "teradata")(args)
+
+
 class Presto(Dialect):
    INDEX_OFFSET = 1
    NULL_ORDERING = "nulls_are_last"
@ -201,6 +222,12 @@ class Presto(Dialect):
    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE

    class Tokenizer(tokens.Tokenizer):
+        UNICODE_STRINGS = [
+            (prefix + q, q)
+            for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES)
+            for prefix in ("U&", "u&")
+        ]
+
        KEYWORDS = {
            **tokens.Tokenizer.KEYWORDS,
            "START": TokenType.BEGIN,
@ -253,8 +280,9 @@ class Presto(Dialect):
            "STRPOS": lambda args: exp.StrPosition(
                this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2)
            ),
-            "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
+            "TO_CHAR": _parse_to_char,
            "TO_HEX": exp.Hex.from_arg_list,
+            "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
            "TO_UTF8": lambda args: exp.Encode(
                this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
            ),
@ -315,7 +343,12 @@ class Presto(Dialect):
            exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]),
            exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
            exp.DateAdd: lambda self, e: self.func(
-                "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
+                "DATE_ADD",
+                exp.Literal.string(e.text("unit") or "day"),
+                _to_int(
+                    e.expression,
+                ),
+                e.this,
            ),
            exp.DateDiff: lambda self, e: self.func(
                "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
@ -325,7 +358,7 @@ class Presto(Dialect):
            exp.DateSub: lambda self, e: self.func(
                "DATE_ADD",
                exp.Literal.string(e.text("unit") or "day"),
-                e.expression * -1,
+                _to_int(e.expression * -1),
                e.this,
            ),
            exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"),
@ -354,6 +387,7 @@ class Presto(Dialect):
            exp.Right: right_to_substring_sql,
            exp.SafeDivide: no_safe_divide_sql,
            exp.Schema: _schema_sql,
+            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
            exp.Select: transforms.preprocess(
                [
                    transforms.eliminate_qualify,
@ -377,6 +411,7 @@ class Presto(Dialect):
            exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))",
            exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.TimeToUnix: rename_func("TO_UNIXTIME"),
+            exp.ToChar: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})",
            exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]),
            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
            exp.TsOrDsAdd: _ts_or_ds_add_sql,