1
0
Fork 0

Merging upstream version 25.34.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:58:22 +01:00
parent f19abab1f0
commit 589d8e33dc
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
66 changed files with 53358 additions and 52814 deletions

View file

@ -200,24 +200,7 @@ LANGUAGE js AS
self.validate_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)")
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)")
self.validate_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)")
self.validate_all(
"EDIT_DISTANCE(col1, col2, max_distance => 3)",
write={
"bigquery": "EDIT_DISTANCE(col1, col2, max_distance => 3)",
"clickhouse": UnsupportedError,
"databricks": UnsupportedError,
"drill": UnsupportedError,
"duckdb": UnsupportedError,
"hive": UnsupportedError,
"postgres": "LEVENSHTEIN_LESS_EQUAL(col1, col2, 3)",
"presto": UnsupportedError,
"snowflake": "EDITDISTANCE(col1, col2, 3)",
"spark": UnsupportedError,
"spark2": UnsupportedError,
"sqlite": UnsupportedError,
},
)
self.validate_identity("SELECT * FROM x WHERE x.y >= (SELECT MAX(a) FROM b-c) - 20")
self.validate_identity(
"MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE"
)
@ -332,6 +315,23 @@ LANGUAGE js AS
"SELECT CAST(1 AS INT64)",
)
self.validate_all(
"EDIT_DISTANCE(col1, col2, max_distance => 3)",
write={
"bigquery": "EDIT_DISTANCE(col1, col2, max_distance => 3)",
"clickhouse": UnsupportedError,
"databricks": UnsupportedError,
"drill": UnsupportedError,
"duckdb": UnsupportedError,
"hive": UnsupportedError,
"postgres": "LEVENSHTEIN_LESS_EQUAL(col1, col2, 3)",
"presto": UnsupportedError,
"snowflake": "EDITDISTANCE(col1, col2, 3)",
"spark": UnsupportedError,
"spark2": UnsupportedError,
"sqlite": UnsupportedError,
},
)
self.validate_all(
"EDIT_DISTANCE(a, b)",
write={
@ -1608,11 +1608,11 @@ WHERE
)
self.validate_identity(
"CONTAINS_SUBSTRING(a, b, json_scope => 'JSON_KEYS_AND_VALUES')"
"CONTAINS_SUBSTR(a, b, json_scope => 'JSON_KEYS_AND_VALUES')"
).assert_is(exp.Anonymous)
self.validate_all(
"""CONTAINS_SUBSTRING(a, b)""",
"""CONTAINS_SUBSTR(a, b)""",
read={
"": "CONTAINS(a, b)",
"spark": "CONTAINS(a, b)",
@ -1628,7 +1628,7 @@ WHERE
"snowflake": "CONTAINS(LOWER(a), LOWER(b))",
"duckdb": "CONTAINS(LOWER(a), LOWER(b))",
"oracle": "CONTAINS(LOWER(a), LOWER(b))",
"bigquery": "CONTAINS_SUBSTRING(a, b)",
"bigquery": "CONTAINS_SUBSTR(a, b)",
},
)
@ -2131,6 +2131,16 @@ OPTIONS (
},
)
self.validate_all(
f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order}) FROM t",
read={
"": f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order} {null_order}) FROM t",
},
write={
"bigquery": f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order}) FROM t",
},
)
def test_json_extract(self):
self.validate_all(
"""SELECT JSON_QUERY('{"class": {"students": []}}', '$.class')""",

View file

@ -33,7 +33,8 @@ class TestDatabricks(Validator):
"CREATE TABLE IF NOT EXISTS db.table (a TIMESTAMP, b BOOLEAN GENERATED ALWAYS AS (NOT a IS NULL)) USING DELTA"
)
self.validate_identity(
"SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(CAST(foo AS TIMESTAMP), 'America/Los_Angeles') AS TIMESTAMP), 'yyyy-MM-dd HH:mm:ss') AS foo FROM t"
"SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(foo, 'America/Los_Angeles') AS TIMESTAMP), 'yyyy-MM-dd HH:mm:ss') AS foo FROM t",
"SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(CAST(foo AS TIMESTAMP), 'America/Los_Angeles') AS TIMESTAMP), 'yyyy-MM-dd HH:mm:ss') AS foo FROM t",
)
self.validate_identity(
"SELECT * FROM sales UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"

View file

@ -79,7 +79,7 @@ class TestDuckDB(Validator):
self.validate_all(
"SELECT SUM(X) OVER (ORDER BY x)",
write={
"bigquery": "SELECT SUM(X) OVER (ORDER BY x NULLS LAST)",
"bigquery": "SELECT SUM(X) OVER (ORDER BY x)",
"duckdb": "SELECT SUM(X) OVER (ORDER BY x)",
"mysql": "SELECT SUM(X) OVER (ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x)",
},

View file

@ -761,13 +761,13 @@ class TestHive(Validator):
},
)
self.validate_all(
"SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH') AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
"SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH')",
read={
"hive": "SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH') AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
"presto": "SELECT DATE_TRUNC('MONTH', CAST(ds AS TIMESTAMP)) AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
"hive": "SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH')",
"presto": "SELECT DATE_TRUNC('MONTH', CAST(ds AS TIMESTAMP))",
},
write={
"presto": "SELECT DATE_TRUNC('MONTH', TRY_CAST(ds AS TIMESTAMP)) AS mm FROM tbl WHERE ds BETWEEN '2023-10-01' AND '2024-02-29'",
"presto": "SELECT DATE_TRUNC('MONTH', TRY_CAST(ds AS TIMESTAMP))",
},
)
self.validate_all(

View file

@ -1273,22 +1273,27 @@ COMMENT='客户账户表'"""
)
def test_timestamp_trunc(self):
for dialect in ("postgres", "snowflake", "duckdb", "spark", "databricks"):
hive_dialects = ("spark", "databricks")
for dialect in ("postgres", "snowflake", "duckdb", *hive_dialects):
for unit in (
"MILLISECOND",
"SECOND",
"DAY",
"MONTH",
"YEAR",
):
with self.subTest(f"MySQL -> {dialect} Timestamp Trunc with unit {unit}: "):
cast = (
"TIMESTAMP('2001-02-16 20:38:40')"
if dialect in hive_dialects
else "CAST('2001-02-16 20:38:40' AS DATETIME)"
)
self.validate_all(
f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})",
f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', {cast})) {unit})",
read={
dialect: f"DATE_TRUNC({unit}, TIMESTAMP '2001-02-16 20:38:40')",
},
write={
"mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})",
"mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', {cast})) {unit})",
},
)

View file

@ -1047,6 +1047,7 @@ class TestPostgres(Validator):
self.validate_identity("CREATE TABLE tbl (col INT UNIQUE NULLS NOT DISTINCT DEFAULT 9.99)")
self.validate_identity("CREATE TABLE tbl (col UUID UNIQUE DEFAULT GEN_RANDOM_UUID())")
self.validate_identity("CREATE TABLE tbl (col UUID, UNIQUE NULLS NOT DISTINCT (col))")
self.validate_identity("CREATE TABLE tbl (col_a INT GENERATED ALWAYS AS (1 + 2) STORED)")
self.validate_identity("CREATE INDEX CONCURRENTLY ix_table_id ON tbl USING btree(id)")
self.validate_identity(

View file

@ -354,7 +354,7 @@ class TestPresto(Validator):
},
)
self.validate_all(
"((DAY_OF_WEEK(CAST(TRY_CAST('2012-08-08 01:00:00' AS TIMESTAMP) AS DATE)) % 7) + 1)",
"((DAY_OF_WEEK(CAST(CAST(TRY_CAST('2012-08-08 01:00:00' AS TIMESTAMP WITH TIME ZONE) AS TIMESTAMP) AS DATE)) % 7) + 1)",
read={
"spark": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))",
},
@ -406,7 +406,7 @@ class TestPresto(Validator):
},
)
self.validate_all(
"SELECT AT_TIMEZONE(CAST('2012-10-31 00:00' AS TIMESTAMP), 'America/Sao_Paulo')",
"SELECT AT_TIMEZONE(CAST(CAST('2012-10-31 00:00' AS TIMESTAMP WITH TIME ZONE) AS TIMESTAMP), 'America/Sao_Paulo')",
read={
"spark": "SELECT FROM_UTC_TIMESTAMP(TIMESTAMP '2012-10-31 00:00', 'America/Sao_Paulo')",
},
@ -1071,6 +1071,18 @@ class TestPresto(Validator):
"databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)",
},
)
self.validate_all(
"CURRENT_USER",
read={
"presto": "CURRENT_USER",
"trino": "CURRENT_USER",
"snowflake": "CURRENT_USER()", # Although the ANSI standard is CURRENT_USER
},
write={
"trino": "CURRENT_USER",
"snowflake": "CURRENT_USER()",
},
)
def test_encode_decode(self):
self.validate_identity("FROM_UTF8(x, y)")
@ -1191,7 +1203,8 @@ MATCH_RECOGNIZE (
DEFINE
B AS totalprice < PREV(totalprice),
C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice,
D AS totalprice > PREV(totalprice)
D AS totalprice > PREV(totalprice),
E AS MAX(foo) >= NEXT(bar)
)""",
pretty=True,
)

View file

@ -124,6 +124,7 @@ WHERE
self.validate_identity(
"SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID"
)
self.validate_identity("TO_TIMESTAMP(col, fmt)")
self.validate_identity(
"CAST(x AS GEOGRAPHY)",
"TO_GEOGRAPHY(x)",
@ -670,6 +671,15 @@ WHERE
"snowflake": "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z) = 1",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP(col, 'DD-MM-YYYY HH12:MI:SS') FROM t",
write={
"bigquery": "SELECT PARSE_TIMESTAMP('%d-%m-%Y %I:%M:%S', col) FROM t",
"duckdb": "SELECT STRPTIME(col, '%d-%m-%Y %I:%M:%S') FROM t",
"snowflake": "SELECT TO_TIMESTAMP(col, 'DD-mm-yyyy hh12:mi:ss') FROM t",
"spark": "SELECT TO_TIMESTAMP(col, 'dd-MM-yyyy hh:mm:ss') FROM t",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP(1659981729)",
write={

View file

@ -765,6 +765,16 @@ TBLPROPERTIES (
},
)
self.validate_all(
"SELECT CAST(col AS TIMESTAMP)",
write={
"spark2": "SELECT CAST(col AS TIMESTAMP)",
"spark": "SELECT CAST(col AS TIMESTAMP)",
"databricks": "SELECT TRY_CAST(col AS TIMESTAMP)",
"duckdb": "SELECT TRY_CAST(col AS TIMESTAMPTZ)",
},
)
def test_bool_or(self):
self.validate_all(
"SELECT a, LOGICAL_OR(b) FROM table GROUP BY a",

View file

@ -2075,3 +2075,18 @@ FROM OPENJSON(@json) WITH (
"tsql": UnsupportedError,
},
)
def test_next_value_for(self):
self.validate_identity(
"SELECT NEXT VALUE FOR db.schema.sequence_name OVER (ORDER BY foo), col"
)
self.validate_all(
"SELECT NEXT VALUE FOR db.schema.sequence_name",
read={
"oracle": "SELECT NEXT VALUE FOR db.schema.sequence_name",
"tsql": "SELECT NEXT VALUE FOR db.schema.sequence_name",
},
write={
"oracle": "SELECT NEXT VALUE FOR db.schema.sequence_name",
},
)