Adding upstream version 25.20.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:53:56 +01:00 · 2025-02-13 21:53:56 +01:00 · 0b78a18345
commit 0b78a18345
parent b35dbeb6b6
119 changed files with 78094 additions and 71498 deletions
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@ -1345,3 +1345,26 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
        self.assertEqual(4, normalization_distance(gen_expr(2), max_=100))
        self.assertEqual(18, normalization_distance(gen_expr(3), max_=100))
        self.assertEqual(110, normalization_distance(gen_expr(10), max_=100))
+
+    def test_custom_annotators(self):
+        # In Spark hierarchy, SUBSTRING result type is dependent on input expr type
+        for dialect in ("spark2", "spark", "databricks"):
+            for expr_type_pair in (
+                ("col", "STRING"),
+                ("col", "BINARY"),
+                ("'str_literal'", "STRING"),
+                ("CAST('str_literal' AS BINARY)", "BINARY"),
+            ):
+                with self.subTest(
+                    f"Testing {dialect}'s SUBSTRING() result type for {expr_type_pair}"
+                ):
+                    expr, type = expr_type_pair
+                    ast = parse_one(f"SELECT substring({expr}, 2, 3) AS x FROM tbl", read=dialect)
+
+                    subst_type = (
+                        optimizer.optimize(ast, schema={"tbl": {"col": type}}, dialect=dialect)
+                        .expressions[0]
+                        .type
+                    )
+
+                    self.assertEqual(subst_type.sql(dialect), exp.DataType.build(type).sql(dialect))