Adding upstream version 26.9.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-03-09 08:41:47 +01:00 · 2025-03-09 08:41:47 +01:00 · 412e82cbc6
commit 412e82cbc6
parent 4c394df415
43 changed files with 27039 additions and 26675 deletions
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@ -142,7 +142,6 @@ LANGUAGE js AS
        self.validate_identity("SELECT test.Unknown FROM test")
        self.validate_identity(r"SELECT '\n\r\a\v\f\t'")
        self.validate_identity("SELECT * FROM tbl FOR SYSTEM_TIME AS OF z")
-        self.validate_identity("STRING_AGG(DISTINCT a ORDER BY b DESC, c DESC LIMIT 10)")
        self.validate_identity("SELECT PARSE_TIMESTAMP('%c', 'Thu Dec 25 07:30:00 2008', 'UTC')")
        self.validate_identity("SELECT ANY_VALUE(fruit HAVING MAX sold) FROM fruits")
        self.validate_identity("SELECT ANY_VALUE(fruit HAVING MIN sold) FROM fruits")
@ -150,10 +149,6 @@ LANGUAGE js AS
        self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day")
        self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')")
        self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')")
-        self.validate_identity("STRING_AGG(a)")
-        self.validate_identity("STRING_AGG(a, ' & ')")
-        self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
-        self.validate_identity("STRING_AGG(a, ' & ' ORDER BY LENGTH(a))")
        self.validate_identity("DATE(2016, 12, 25)")
        self.validate_identity("DATE(CAST('2016-12-25 23:59:59' AS DATETIME))")
        self.validate_identity("SELECT foo IN UNNEST(bar) AS bla")
@ -1844,12 +1839,6 @@ WHERE
            "CREATE TEMPORARY FUNCTION string_length_0(strings ARRAY<STRING>) RETURNS FLOAT64 LANGUAGE js OPTIONS (library=['gs://ibis-testing-libraries/lodash.min.js']) AS '\\'use strict\\'; function string_length(strings) { return _.sum(_.map(strings, ((x) => x.length))); } return string_length(strings);'",
        )

-    def test_group_concat(self):
-        self.validate_all(
-            "SELECT a, GROUP_CONCAT(b) FROM table GROUP BY a",
-            write={"bigquery": "SELECT a, STRING_AGG(b) FROM table GROUP BY a"},
-        )
-
    def test_remove_precision_parameterized_types(self):
        self.validate_identity("CREATE TABLE test (a NUMERIC(10, 2))")
        self.validate_identity(
@ -2347,3 +2336,19 @@ OPTIONS (
                "snowflake": "SELECT TO_CHAR(CAST(CAST('2050-12-25 15:30:55+00' AS TIMESTAMPTZ) AS TIMESTAMP), 'mon-DD-yyyy')",
            },
        )
+
+    def test_string_agg(self):
+        self.validate_identity(
+            "SELECT a, GROUP_CONCAT(b) FROM table GROUP BY a",
+            "SELECT a, STRING_AGG(b, ',') FROM table GROUP BY a",
+        )
+
+        self.validate_identity("STRING_AGG(a, ' & ')")
+        self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
+        self.validate_identity("STRING_AGG(a, ' & ' ORDER BY LENGTH(a))")
+
+        self.validate_identity("STRING_AGG(a)", "STRING_AGG(a, ',')")
+        self.validate_identity(
+            "STRING_AGG(DISTINCT a ORDER BY b DESC, c DESC LIMIT 10)",
+            "STRING_AGG(DISTINCT a, ',' ORDER BY b DESC, c DESC LIMIT 10)",
+        )
--- a/tests/dialects/test_hive.py
+++ b/tests/dialects/test_hive.py
@ -173,6 +173,7 @@ class TestHive(Validator):
        self.validate_identity(
            """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
        )
+        self.validate_identity("CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'")
        self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2")
        self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2")
        self.validate_identity(
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@ -83,6 +83,12 @@ class TestMySQL(Validator):
        self.validate_identity(
            "CREATE OR REPLACE VIEW my_view AS SELECT column1 AS `boo`, column2 AS `foo` FROM my_table WHERE column3 = 'some_value' UNION SELECT q.* FROM fruits_table, JSON_TABLE(Fruits, '$[*]' COLUMNS(id VARCHAR(255) PATH '$.$id', value VARCHAR(255) PATH '$.value')) AS q",
        )
+        self.validate_identity(
+            "CREATE TABLE test_table (id INT AUTO_INCREMENT, PRIMARY KEY (id) USING BTREE)"
+        )
+        self.validate_identity(
+            "CREATE TABLE test_table (id INT AUTO_INCREMENT, PRIMARY KEY (id) USING HASH)"
+        )
        self.validate_identity(
            "/*left*/ EXPLAIN SELECT /*hint*/ col FROM t1 /*right*/",
            "/* left */ DESCRIBE /* hint */ SELECT col FROM t1 /* right */",
@ -340,6 +346,22 @@ class TestMySQL(Validator):
                "tsql": "CHAR(10)",
            },
        )
+        self.validate_identity("CREATE TABLE t (foo VARBINARY(5))")
+        self.validate_all(
+            "CREATE TABLE t (foo BLOB)",
+            write={
+                "mysql": "CREATE TABLE t (foo BLOB)",
+                "oracle": "CREATE TABLE t (foo BLOB)",
+                "postgres": "CREATE TABLE t (foo BYTEA)",
+                "tsql": "CREATE TABLE t (foo VARBINARY)",
+                "sqlite": "CREATE TABLE t (foo BLOB)",
+                "duckdb": "CREATE TABLE t (foo VARBINARY)",
+                "hive": "CREATE TABLE t (foo BINARY)",
+                "bigquery": "CREATE TABLE t (foo BYTES)",
+                "redshift": "CREATE TABLE t (foo VARBYTE)",
+                "clickhouse": "CREATE TABLE t (foo Nullable(String))",
+            },
+        )

    def test_escape(self):
        self.validate_identity("""'"abc"'""")
--- a/tests/dialects/test_oracle.py
+++ b/tests/dialects/test_oracle.py
@ -319,6 +319,7 @@ class TestOracle(Validator):
                "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 0 ROWS FETCH FIRST 10 ROWS ONLY",
            },
        )
+        self.validate_identity("CREATE OR REPLACE FORCE VIEW foo1.foo2")

    def test_join_marker(self):
        self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")
--- a/tests/dialects/test_postgres.py
+++ b/tests/dialects/test_postgres.py
@ -345,6 +345,28 @@ class TestPostgres(Validator):
            "CAST(x AS INT8)",
            "CAST(x AS BIGINT)",
        )
+        self.validate_identity(
+            """
+            WITH
+              json_data AS (SELECT '{"field_id": [1, 2, 3]}'::JSON AS data),
+              field_ids AS (SELECT 'field_id' AS field_id)
+
+            SELECT
+                JSON_ARRAY_ELEMENTS(json_data.data -> field_ids.field_id) AS element
+            FROM json_data, field_ids
+            """,
+            """WITH json_data AS (
+  SELECT
+    CAST('{"field_id": [1, 2, 3]}' AS JSON) AS data
+), field_ids AS (
+  SELECT
+    'field_id' AS field_id
+)
+SELECT
+  JSON_ARRAY_ELEMENTS(JSON_EXTRACT_PATH(json_data.data, field_ids.field_id)) AS element
+FROM json_data, field_ids""",
+            pretty=True,
+        )

        self.validate_all(
            "SELECT ARRAY[]::INT[] AS foo",
@ -1368,3 +1390,25 @@ CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(JSON_EXTRACT_PATH(tbox, 'boxes') AS JSON)) A
        self.validate_identity(
            "WITH RECURSIVE search_graph(id, link, data, depth) AS (SELECT g.id, g.link, g.data, 1 FROM graph AS g UNION ALL SELECT g.id, g.link, g.data, sg.depth + 1 FROM graph AS g, search_graph AS sg WHERE g.id = sg.link) CYCLE id SET is_cycle USING path SELECT * FROM search_graph"
        )
+
+    def test_json_extract(self):
+        for arrow_op in ("->", "->>"):
+            with self.subTest(f"Ensure {arrow_op} operator roundtrips int values as subscripts"):
+                self.validate_all(
+                    f"SELECT foo {arrow_op} 1",
+                    write={
+                        "postgres": f"SELECT foo {arrow_op} 1",
+                        "duckdb": f"SELECT foo {arrow_op} '$[1]'",
+                    },
+                )
+
+            with self.subTest(
+                f"Ensure {arrow_op} operator roundtrips string values that represent integers as keys"
+            ):
+                self.validate_all(
+                    f"SELECT foo {arrow_op} '12'",
+                    write={
+                        "postgres": f"SELECT foo {arrow_op} '12'",
+                        "clickhouse": "SELECT JSONExtractString(foo, '12')",
+                    },
+                )
--- a/tests/dialects/test_redshift.py
+++ b/tests/dialects/test_redshift.py
@ -42,8 +42,8 @@ class TestRedshift(Validator):
                "duckdb": "STRING_AGG(sellerid, ', ')",
            },
            write={
-                # GROUP_CONCAT and STRING_AGG are aliases in DuckDB
-                "duckdb": "GROUP_CONCAT(sellerid, ', ')",
+                # GROUP_CONCAT, LISTAGG and STRING_AGG are aliases in DuckDB
+                "duckdb": "LISTAGG(sellerid, ', ')",
                "redshift": "LISTAGG(sellerid, ', ')",
            },
        )
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@ -52,7 +52,6 @@ class TestSnowflake(Validator):
        self.validate_identity("SELECT OBJECT_CONSTRUCT()")
        self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())")
        self.validate_identity("SELECT DAYOFYEAR(CURRENT_TIMESTAMP())")
-        self.validate_identity("LISTAGG(data['some_field'], ',')")
        self.validate_identity("WEEKOFYEAR(tstamp)")
        self.validate_identity("SELECT QUARTER(CURRENT_TIMESTAMP())")
        self.validate_identity("SELECT SUM(amount) FROM mytable GROUP BY ALL")
@ -107,6 +106,10 @@ class TestSnowflake(Validator):
        self.validate_identity(
            """SELECT TO_TIMESTAMP('2025-01-16T14:45:30.123+0500', 'yyyy-mm-DD"T"hh24:mi:ss.ff3TZHTZM')"""
        )
+        self.validate_identity(
+            "SELECT 1 put",
+            "SELECT 1 AS put",
+        )
        self.validate_identity(
            "WITH t (SELECT 1 AS c) SELECT c FROM t",
            "WITH t AS (SELECT 1 AS c) SELECT c FROM t",
@ -296,6 +299,27 @@ class TestSnowflake(Validator):
            "SELECT * RENAME (a AS b), c AS d FROM xxx",
        )

+        # Support for optional trailing commas after tables in from clause
+        self.validate_identity(
+            "SELECT * FROM xxx, yyy, zzz,",
+            "SELECT * FROM xxx, yyy, zzz",
+        )
+        self.validate_identity(
+            "SELECT * FROM xxx, yyy, zzz, WHERE foo = bar",
+            "SELECT * FROM xxx, yyy, zzz WHERE foo = bar",
+        )
+        self.validate_identity(
+            "SELECT * FROM xxx, yyy, zzz",
+            "SELECT * FROM xxx, yyy, zzz",
+        )
+
+        self.validate_all(
+            "CREATE TABLE test_table (id NUMERIC NOT NULL AUTOINCREMENT)",
+            write={
+                "duckdb": "CREATE TABLE test_table (id DECIMAL(38, 0) NOT NULL)",
+                "snowflake": "CREATE TABLE test_table (id DECIMAL(38, 0) NOT NULL AUTOINCREMENT)",
+            },
+        )
        self.validate_all(
            "SELECT TO_TIMESTAMP('2025-01-16 14:45:30.123', 'yyyy-mm-DD hh24:mi:ss.ff6')",
            write={
@ -852,13 +876,6 @@ class TestSnowflake(Validator):
                "snowflake": "CASE WHEN x = a OR (x IS NULL AND a IS NULL) THEN b WHEN x = c OR (x IS NULL AND c IS NULL) THEN d ELSE e END",
            },
        )
-        self.validate_all(
-            "SELECT LISTAGG(col1, ', ') WITHIN GROUP (ORDER BY col2) FROM t",
-            write={
-                "duckdb": "SELECT GROUP_CONCAT(col1, ', ' ORDER BY col2) FROM t",
-                "snowflake": "SELECT LISTAGG(col1, ', ') WITHIN GROUP (ORDER BY col2) FROM t",
-            },
-        )
        self.validate_all(
            "SELECT APPROX_PERCENTILE(a, 0.5) FROM t",
            read={
@ -2369,6 +2386,43 @@ SINGLE = TRUE""",
            """COPY INTO 's3://example/contacts.csv' FROM "db"."tbl" STORAGE_INTEGRATION = "PROD_S3_SIDETRADE_INTEGRATION" FILE_FORMAT = (FORMAT_NAME="my_csv_format" TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"') MATCH_BY_COLUMN_NAME = CASE_SENSITIVE OVERWRITE = TRUE SINGLE = TRUE INCLUDE_METADATA = ("col1" = "METADATA$START_SCAN_TIME")""",
        )

+    def test_put_to_stage(self):
+        # PUT with file path and stage ref containing spaces (wrapped in single quotes)
+        ast = parse_one("PUT 'file://my file.txt' '@s1/my folder'", read="snowflake")
+        self.assertIsInstance(ast, exp.Put)
+        self.assertEqual(ast.this, exp.Literal(this="file://my file.txt", is_string=True))
+        self.assertEqual(ast.args["target"], exp.Var(this="@s1/my folder"))
+
+        # expression with additional properties
+        ast = parse_one(
+            "PUT 'file:///tmp/my.txt' @stage1/folder PARALLEL = 1 AUTO_COMPRESS=false source_compression=gzip OVERWRITE=TRUE",
+            read="snowflake",
+        )
+        self.assertIsInstance(ast, exp.Put)
+        self.assertEqual(ast.this, exp.Literal(this="file:///tmp/my.txt", is_string=True))
+        self.assertEqual(ast.args["target"], exp.Var(this="@stage1/folder"))
+        properties = ast.args.get("properties")
+        props_dict = {prop.this.this: prop.args["value"].this for prop in properties.expressions}
+        self.assertEqual(
+            props_dict,
+            {
+                "PARALLEL": "1",
+                "AUTO_COMPRESS": False,
+                "source_compression": "gzip",
+                "OVERWRITE": True,
+            },
+        )
+
+        # validate identity for different args and properties
+        self.validate_identity("PUT 'file:///dir/tmp.csv' @s1/test")
+
+        # the unquoted URI variant is not fully supported yet
+        self.validate_identity("PUT file:///dir/tmp.csv @%table", check_command_warning=True)
+        self.validate_identity(
+            "PUT file:///dir/tmp.csv @s1/test PARALLEL=1 AUTO_COMPRESS=FALSE source_compression=gzip OVERWRITE=TRUE",
+            check_command_warning=True,
+        )
+
    def test_querying_semi_structured_data(self):
        self.validate_identity("SELECT $1")
        self.validate_identity("SELECT $1.elem")
@ -2450,3 +2504,20 @@ SINGLE = TRUE""",
                "trino": "SELECT 1 ORDER BY 1 OFFSET 0",
            },
        )
+
+    def test_listagg(self):
+        self.validate_identity("LISTAGG(data['some_field'], ',')")
+
+        for distinct in ("", "DISTINCT "):
+            self.validate_all(
+                f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t",
+                read={
+                    "trino": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t",
+                    "duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t",
+                },
+                write={
+                    "snowflake": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t",
+                    "trino": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t",
+                    "duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t",
+                },
+            )
--- a/tests/fixtures/identity.sql
+++ b/tests/fixtures/identity.sql
@ -888,4 +888,4 @@ SELECT 1 LIMIT 1
 CAST(x AS INT128)
 CAST(x AS UINT128)
 CAST(x AS UINT256)
-SELECT export
+SELECT export
--- a/tests/fixtures/partial.sql
+++ b/tests/fixtures/partial.sql
@ -2,7 +2,6 @@ SELECT a FROM
 SELECT a FROM x WHERE
 SELECT a +
 a *
-SELECT a FROM x,
 SELECT a FROM x GROUP BY
 WITH a AS (SELECT 1), b AS (SELECT 2)
 SELECT FROM x