1
0
Fork 0

Merging upstream version 26.26.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-06-11 08:06:17 +02:00
parent 768f936511
commit 1ac9fca060
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
62 changed files with 938 additions and 453 deletions

View file

@ -462,6 +462,16 @@ class TestDialect(Validator):
"CAST('127.0.0.1/32' AS INET)",
read={"postgres": "INET '127.0.0.1/32'"},
)
self.assertIsNotNone(
self.validate_identity("CREATE TABLE foo (bar INT AS (foo))").find(
exp.ComputedColumnConstraint
)
)
self.assertIsNotNone(
self.validate_identity(
"CREATE TABLE foo (t1 INT, t2 INT, bar INT AS (t1 * t2 * 2))"
).find(exp.ComputedColumnConstraint)
)
def test_ddl(self):
self.validate_all(
@ -1304,6 +1314,32 @@ class TestDialect(Validator):
},
)
self.validate_all(
"ARRAY_INTERSECT(x, y)",
read={
"hive": "ARRAY_INTERSECT(x, y)",
"spark2": "ARRAY_INTERSECT(x, y)",
"spark": "ARRAY_INTERSECT(x, y)",
"databricks": "ARRAY_INTERSECT(x, y)",
"presto": "ARRAY_INTERSECT(x, y)",
"trino": "ARRAY_INTERSECT(x, y)",
"snowflake": "ARRAY_INTERSECTION(x, y)",
"starrocks": "ARRAY_INTERSECT(x, y)",
},
write={
"hive": "ARRAY_INTERSECT(x, y)",
"spark2": "ARRAY_INTERSECT(x, y)",
"spark": "ARRAY_INTERSECT(x, y)",
"databricks": "ARRAY_INTERSECT(x, y)",
"presto": "ARRAY_INTERSECT(x, y)",
"trino": "ARRAY_INTERSECT(x, y)",
"snowflake": "ARRAY_INTERSECTION(x, y)",
"starrocks": "ARRAY_INTERSECT(x, y)",
},
)
self.validate_identity("SELECT ARRAY_INTERSECT(x, y, z)")
def test_order_by(self):
self.validate_identity(
"SELECT c FROM t ORDER BY a, b,",
@ -3528,3 +3564,81 @@ FROM subquery2""",
f"FROM x |> SELECT x1, x2 |> WHERE x1 > 0 |> WHERE x2 > 0 |> ORDER BY x1, x2 |> {option}",
f"SELECT x1, x2 FROM (SELECT * FROM x) WHERE x1 > 0 AND x2 > 0 ORDER BY x1, x2 {option}",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3)",
"SELECT SUM(x1), MAX(x2), MIN(x3) FROM (SELECT * FROM x)",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1) AS s_x1 |> SELECT s_x1",
"SELECT s_x1 FROM (SELECT SUM(x1) AS s_x1 FROM (SELECT * FROM x))",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3) GROUP BY x4, x5",
"SELECT SUM(x1), MAX(x2), MIN(x3), x4, x5 FROM (SELECT * FROM x) GROUP BY x4, x5",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3) GROUP BY x4 AS a_x4, x5 AS a_x5",
"SELECT SUM(x1), MAX(x2), MIN(x3), x4 AS a_x4, x5 AS a_x5 FROM (SELECT * FROM x) GROUP BY a_x4, a_x5",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1) as s_x1 GROUP BY x1 |> SELECT s_x1, x1 as ss_x1",
"SELECT s_x1, x1 AS ss_x1 FROM (SELECT SUM(x1) AS s_x1, x1 FROM (SELECT * FROM x) GROUP BY x1)",
)
self.validate_identity(
"FROM x |> AGGREGATE SUM(x1) GROUP", "SELECT SUM(x1) AS GROUP FROM (SELECT * FROM x)"
)
for order_option in ("ASC", "DESC", "ASC NULLS LAST", "DESC NULLS FIRST"):
with self.subTest(f"Testing pipe syntax AGGREGATE for order option: {order_option}"):
self.validate_all(
f"SELECT SUM(x1) AS x_s FROM (SELECT * FROM x) ORDER BY x_s {order_option}",
read={
"bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s {order_option}",
},
)
self.validate_all(
f"SELECT SUM(x1) AS x_s, x1 AS g_x1 FROM (SELECT * FROM x) GROUP BY g_x1 ORDER BY x_s {order_option}",
read={
"bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s {order_option} GROUP BY x1 AS g_x1",
},
)
with self.subTest(
f"Testing pipe syntax AGGREGATE with GROUP AND ORDER BY for order option: {order_option}"
):
self.validate_all(
f"SELECT g_x1, x_s FROM (SELECT SUM(x1) AS x_s, x1 AS g_x1 FROM (SELECT * FROM x) GROUP BY g_x1 ORDER BY g_x1 {order_option})",
read={
"bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s GROUP AND ORDER BY x1 AS g_x1 {order_option} |> SELECT g_x1, x_s",
},
)
for op_operator in (
"UNION ALL",
"UNION DISTINCT",
"INTERSECT DISTINCT",
"EXCEPT DISTINCT",
):
with self.subTest(f"Testing pipe syntax SET OPERATORS: {op_operator}"):
self.validate_all(
f"FROM x|> {op_operator} (SELECT y1 FROM y), (SELECT z1 FROM z)",
write={
"bigquery": f"SELECT * FROM x {op_operator} (SELECT y1 FROM y) {op_operator} (SELECT z1 FROM z)",
},
)
for op_prefix in ("LEFT OUTER", "FULL OUTER"):
for op_operator in (
"UNION ALL",
"UNION DISTINCT",
"INTERSECT DISTINCT",
"EXCEPT DISTINCT",
):
for suffix_operator in ("BY NAME", "CORRESPONDING"):
with self.subTest(
f"Testing pipe syntax SET OPERATORS: {op_prefix} {op_operator} {suffix_operator}"
):
self.validate_all(
f"FROM x|> SELECT x1, x2 FROM x |> {op_prefix} {op_operator} {suffix_operator} (SELECT y1, y2 FROM y), (SELECT z1, z2 FROM z)",
write={
"bigquery": f"SELECT x1, x2 FROM (SELECT * FROM x) {op_prefix} {op_operator} BY NAME (SELECT y1, y2 FROM y) {op_prefix} {op_operator} BY NAME (SELECT z1, z2 FROM z)",
},
)

View file

@ -9,13 +9,6 @@ class TestDuckDB(Validator):
dialect = "duckdb"
def test_duckdb(self):
self.validate_identity("SELECT * FROM my_ducklake.demo AT (VERSION => 2)")
self.validate_identity("SELECT UUIDV7()")
self.validate_identity("SELECT TRY(LOG(0))")
self.validate_identity("x::timestamp", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp without time zone", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp with time zone", "CAST(x AS TIMESTAMPTZ)")
with self.assertRaises(ParseError):
parse_one("1 //", read="duckdb")
@ -36,6 +29,20 @@ class TestDuckDB(Validator):
"STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]",
)
self.validate_all(
"SELECT FIRST_VALUE(c IGNORE NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t",
write={
"duckdb": "SELECT FIRST_VALUE(c IGNORE NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t",
"sqlite": UnsupportedError,
},
)
self.validate_all(
"SELECT FIRST_VALUE(c RESPECT NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t",
write={
"duckdb": "SELECT FIRST_VALUE(c RESPECT NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t",
"sqlite": "SELECT FIRST_VALUE(c) OVER (PARTITION BY gb ORDER BY ob NULLS LAST) FROM t",
},
)
self.validate_all(
"CAST(x AS UUID)",
write={
@ -264,6 +271,12 @@ class TestDuckDB(Validator):
parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
)
self.validate_identity("SELECT * FROM my_ducklake.demo AT (VERSION => 2)")
self.validate_identity("SELECT UUIDV7()")
self.validate_identity("SELECT TRY(LOG(0))")
self.validate_identity("x::timestamp", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp without time zone", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp with time zone", "CAST(x AS TIMESTAMPTZ)")
self.validate_identity("CAST(x AS FOO)")
self.validate_identity("SELECT UNNEST([1, 2])").selects[0].assert_is(exp.UDTF)
self.validate_identity("'red' IN flags").args["field"].assert_is(exp.Column)

View file

@ -22,6 +22,7 @@ class TestPostgres(Validator):
expected_sql = "ARRAY[\n x" + (",\n x" * 27) + "\n]"
self.validate_identity(sql, expected_sql, pretty=True)
self.validate_identity("SELECT ST_DISTANCE(gg1, gg2, FALSE) AS sphere_dist")
self.validate_identity("SHA384(x)")
self.validate_identity("1.x", "1. AS x")
self.validate_identity("|/ x", "SQRT(x)")
@ -908,6 +909,18 @@ FROM json_data, field_ids""",
},
)
# Postgres introduced ANY_VALUE in version 16
self.validate_all(
"SELECT ANY_VALUE(1) AS col",
write={
"postgres": "SELECT ANY_VALUE(1) AS col",
"postgres, version=16": "SELECT ANY_VALUE(1) AS col",
"postgres, version=17.5": "SELECT ANY_VALUE(1) AS col",
"postgres, version=15": "SELECT MAX(1) AS col",
"postgres, version=13.9": "SELECT MAX(1) AS col",
},
)
def test_ddl(self):
# Checks that user-defined types are parsed into DataType instead of Identifier
self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(

View file

@ -778,7 +778,7 @@ class TestPresto(Validator):
"hive": "FIRST(x)",
"mysql": "ANY_VALUE(x)",
"oracle": "ANY_VALUE(x)",
"postgres": "MAX(x)",
"postgres": "ANY_VALUE(x)",
"presto": "ARBITRARY(x)",
"redshift": "ANY_VALUE(x)",
"snowflake": "ANY_VALUE(x)",

View file

@ -318,6 +318,13 @@ class TestSnowflake(Validator):
"SELECT * FROM xxx, yyy, zzz",
)
self.validate_all(
"SELECT ARRAY_INTERSECTION([1, 2], [2, 3])",
write={
"starrocks": "SELECT ARRAY_INTERSECT([1, 2], [2, 3])",
},
)
self.validate_all(
"CREATE TABLE test_table (id NUMERIC NOT NULL AUTOINCREMENT)",
write={
@ -1079,6 +1086,22 @@ class TestSnowflake(Validator):
},
)
self.validate_all(
"SELECT ST_MAKEPOINT(10, 20)",
write={
"snowflake": "SELECT ST_MAKEPOINT(10, 20)",
"starrocks": "SELECT ST_POINT(10, 20)",
},
)
self.validate_all(
"SELECT ST_DISTANCE(a, b)",
write={
"snowflake": "SELECT ST_DISTANCE(a, b)",
"starrocks": "SELECT ST_DISTANCE_SPHERE(ST_X(a), ST_Y(a), ST_X(b), ST_Y(b))",
},
)
def test_null_treatment(self):
self.validate_all(
r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1",
@ -1687,12 +1710,6 @@ class TestSnowflake(Validator):
},
)
self.assertIsNotNone(
self.validate_identity("CREATE TABLE foo (bar INT AS (foo))").find(
exp.TransformColumnConstraint
)
)
def test_user_defined_functions(self):
self.validate_all(
"CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$",
@ -2013,6 +2030,26 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"spark": "DESCRIBE db.table",
},
)
self.validate_all(
"ENDSWITH('abc', 'c')",
read={
"bigquery": "ENDS_WITH('abc', 'c')",
"clickhouse": "endsWith('abc', 'c')",
"databricks": "ENDSWITH('abc', 'c')",
"duckdb": "ENDS_WITH('abc', 'c')",
"presto": "ENDS_WITH('abc', 'c')",
"spark": "ENDSWITH('abc', 'c')",
},
write={
"bigquery": "ENDS_WITH('abc', 'c')",
"clickhouse": "endsWith('abc', 'c')",
"databricks": "ENDSWITH('abc', 'c')",
"duckdb": "ENDS_WITH('abc', 'c')",
"presto": "ENDS_WITH('abc', 'c')",
"snowflake": "ENDSWITH('abc', 'c')",
"spark": "ENDSWITH('abc', 'c')",
},
)
def test_parse_like_any(self):
like = parse_one("a LIKE ANY fun('foo')", read="snowflake")

View file

@ -113,6 +113,7 @@ class TestSQLite(Validator):
'CREATE TABLE "foo t" ("foo t id" TEXT NOT NULL, PRIMARY KEY ("foo t id"))',
'CREATE TABLE "foo t" ("foo t id" TEXT NOT NULL PRIMARY KEY)',
)
self.validate_identity("REPLACE INTO foo (x, y) VALUES (1, 2)", check_command_warning=True)
def test_strftime(self):
self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')")

View file

@ -9,6 +9,9 @@ class TestStarrocks(Validator):
self.validate_identity("SELECT ARRAY_JOIN([1, 3, 5, NULL], '_', 'NULL')")
self.validate_identity("SELECT ARRAY_JOIN([1, 3, 5, NULL], '_')")
self.validate_identity("ALTER TABLE a SWAP WITH b")
self.validate_identity("SELECT ARRAY_AGG(a) FROM x")
self.validate_identity("SELECT ST_POINT(10, 20)")
self.validate_identity("SELECT ST_DISTANCE_SPHERE(10.1, 20.2, 30.3, 40.4)")
def test_ddl(self):
ddl_sqls = [

View file

@ -669,6 +669,8 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("ARRAY_AGG(a)"), exp.ArrayAgg)
self.assertIsInstance(parse_one("ARRAY_CONTAINS(a, 'a')"), exp.ArrayContains)
self.assertIsInstance(parse_one("ARRAY_SIZE(a)"), exp.ArraySize)
self.assertIsInstance(parse_one("ARRAY_INTERSECTION([1, 2], [2, 3])"), exp.ArrayIntersect)
self.assertIsInstance(parse_one("ARRAY_INTERSECT([1, 2], [2, 3])"), exp.ArrayIntersect)
self.assertIsInstance(parse_one("AVG(a)"), exp.Avg)
self.assertIsInstance(parse_one("BEGIN DEFERRED TRANSACTION"), exp.Transaction)
self.assertIsInstance(parse_one("CEIL(a)"), exp.Ceil)
@ -710,6 +712,8 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("ROUND(a)"), exp.Round)
self.assertIsInstance(parse_one("ROUND(a, 2)"), exp.Round)
self.assertIsInstance(parse_one("SPLIT(a, 'test')"), exp.Split)
self.assertIsInstance(parse_one("ST_POINT(10, 20)"), exp.StPoint)
self.assertIsInstance(parse_one("ST_DISTANCE(a, b)"), exp.StDistance)
self.assertIsInstance(parse_one("STR_POSITION(a, 'test')"), exp.StrPosition)
self.assertIsInstance(parse_one("STR_TO_UNIX(a, 'format')"), exp.StrToUnix)
self.assertIsInstance(parse_one("STRUCT_EXTRACT(a, 'test')"), exp.StructExtract)
@ -752,6 +756,9 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("ADD_MONTHS(a, b)"), exp.AddMonths)
def test_column(self):
column = exp.column(exp.Star(), table="t")
self.assertEqual(column.sql(), "t.*")
column = parse_one("a.b.c.d")
self.assertEqual(column.catalog, "a")
self.assertEqual(column.db, "b")
@ -987,15 +994,20 @@ FROM foo""",
self.assertEqual(table_only.name, "table_name")
self.assertIsNone(table_only.args.get("db"))
self.assertIsNone(table_only.args.get("catalog"))
db_and_table = exp.to_table("db.table_name")
self.assertEqual(db_and_table.name, "table_name")
self.assertEqual(db_and_table.args.get("db"), exp.to_identifier("db"))
self.assertIsNone(db_and_table.args.get("catalog"))
catalog_db_and_table = exp.to_table("catalog.db.table_name")
self.assertEqual(catalog_db_and_table.name, "table_name")
self.assertEqual(catalog_db_and_table.args.get("db"), exp.to_identifier("db"))
self.assertEqual(catalog_db_and_table.args.get("catalog"), exp.to_identifier("catalog"))
table_only_unsafe_identifier = exp.to_table("3e")
self.assertEqual(table_only_unsafe_identifier.sql(), '"3e"')
def test_to_column(self):
column_only = exp.to_column("column_name")
self.assertEqual(column_only.name, "column_name")

View file

@ -201,3 +201,10 @@ x"""
self.assertEqual(len(partial_tokens), 1)
self.assertEqual(partial_tokens[0].token_type, TokenType.VAR)
self.assertEqual(partial_tokens[0].text, "foo")
def test_token_repr(self):
# Ensures both the Python and the Rust tokenizer produce a human-friendly representation
self.assertEqual(
repr(Tokenizer().tokenize("foo")),
"[<Token token_type: TokenType.VAR, text: foo, line: 1, col: 3, start: 0, end: 2, comments: []>]",
)