1
0
Fork 0

Adding upstream version 25.29.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:56:11 +01:00
parent dfac4c492f
commit c61927f460
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
97 changed files with 64720 additions and 61752 deletions

View file

@ -9,7 +9,6 @@ from sqlglot import (
UnsupportedError,
exp,
parse,
parse_one,
transpile,
)
from sqlglot.helper import logger as helper_logger
@ -85,12 +84,21 @@ LANGUAGE js AS
"PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*S%z', x)",
)
table = parse_one("x-0._y.z", dialect="bigquery", into=exp.Table)
for prefix in ("c.db.", "db.", ""):
with self.subTest(f"Parsing {prefix}INFORMATION_SCHEMA.X into a Table"):
table = self.parse_one(f"`{prefix}INFORMATION_SCHEMA.X`", into=exp.Table)
this = table.this
self.assertIsInstance(this, exp.Identifier)
self.assertTrue(this.quoted)
self.assertEqual(this.name, "INFORMATION_SCHEMA.X")
table = self.parse_one("x-0._y.z", into=exp.Table)
self.assertEqual(table.catalog, "x-0")
self.assertEqual(table.db, "_y")
self.assertEqual(table.name, "z")
table = parse_one("x-0._y", dialect="bigquery", into=exp.Table)
table = self.parse_one("x-0._y", into=exp.Table)
self.assertEqual(table.db, "x-0")
self.assertEqual(table.name, "_y")
@ -165,6 +173,7 @@ LANGUAGE js AS
self.validate_identity("SELECT * FROM foo.bar.25ab c", "SELECT * FROM foo.bar.`25ab` AS c")
self.validate_identity("x <> ''")
self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))")
self.validate_identity("DATE_TRUNC(col, MONTH, 'UTC+8')")
self.validate_identity("SELECT b'abc'")
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b")
@ -182,7 +191,6 @@ LANGUAGE js AS
self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1")
self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t")
self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t")
self.validate_identity("""SELECT JSON_EXTRACT_SCALAR('{"a": 5}', '$.a')""")
self.validate_identity("UPDATE x SET y = NULL")
self.validate_identity("LOG(n, b)")
self.validate_identity("SELECT COUNT(x RESPECT NULLS)")
@ -194,11 +202,11 @@ LANGUAGE js AS
self.validate_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)")
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)")
self.validate_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)")
self.validate_identity(
"MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE"
self.validate_identity("EDIT_DISTANCE('a', 'a', max_distance => 2)").assert_is(
exp.Levenshtein
)
self.validate_identity(
"SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW`"
"MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE"
)
self.validate_identity(
"SELECT * FROM test QUALIFY a IS DISTINCT FROM b WINDOW c AS (PARTITION BY d)"
@ -228,10 +236,23 @@ LANGUAGE js AS
"SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)",
)
self.validate_identity(
"""SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')"""
"CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id",
)
self.validate_identity(
"CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id",
"SELECT * FROM `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`",
"SELECT * FROM `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW` AS `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`",
)
self.validate_identity(
"SELECT * FROM region_or_dataset.INFORMATION_SCHEMA.TABLES",
"SELECT * FROM region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES",
)
self.validate_identity(
"SELECT * FROM region_or_dataset.INFORMATION_SCHEMA.TABLES AS some_name",
"SELECT * FROM region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS some_name",
)
self.validate_identity(
"SELECT * FROM proj.region_or_dataset.INFORMATION_SCHEMA.TABLES",
"SELECT * FROM proj.region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES",
)
self.validate_identity(
"CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c",
@ -302,6 +323,13 @@ LANGUAGE js AS
"SELECT CAST(1 AS INT64)",
)
self.validate_all(
"EDIT_DISTANCE(a, b)",
write={
"bigquery": "EDIT_DISTANCE(a, b)",
"duckdb": "LEVENSHTEIN(a, b)",
},
)
self.validate_all(
"SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')",
write={
@ -361,9 +389,18 @@ LANGUAGE js AS
write={
"bigquery": "TIMESTAMP(x)",
"duckdb": "CAST(x AS TIMESTAMPTZ)",
"snowflake": "CAST(x AS TIMESTAMPTZ)",
"presto": "CAST(x AS TIMESTAMP WITH TIME ZONE)",
},
)
self.validate_all(
"SELECT TIMESTAMP('2008-12-25 15:30:00', 'America/Los_Angeles')",
write={
"bigquery": "SELECT TIMESTAMP('2008-12-25 15:30:00', 'America/Los_Angeles')",
"duckdb": "SELECT CAST('2008-12-25 15:30:00' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles'",
"snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', CAST('2008-12-25 15:30:00' AS TIMESTAMP))",
},
)
self.validate_all(
"SELECT SUM(x IGNORE NULLS) AS x",
read={
@ -629,6 +666,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT DATETIME_DIFF('2023-01-01T00:00:00', '2023-01-01T05:00:00', MILLISECOND)",
"databricks": "SELECT TIMESTAMPDIFF(MILLISECOND, '2023-01-01T05:00:00', '2023-01-01T00:00:00')",
"snowflake": "SELECT TIMESTAMPDIFF(MILLISECOND, '2023-01-01T05:00:00', '2023-01-01T00:00:00')",
},
),
)
@ -639,6 +677,7 @@ LANGUAGE js AS
"bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
"databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
"duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) + INTERVAL '1' MILLISECOND",
"snowflake": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
},
),
)
@ -670,6 +709,7 @@ LANGUAGE js AS
"databricks": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
"mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
"spark": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))",
"snowflake": "SELECT TIMESTAMPADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))",
},
)
self.validate_all(
@ -677,6 +717,14 @@ LANGUAGE js AS
write={
"bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)",
"mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)",
"snowflake": "SELECT TIMESTAMPADD(MINUTE, '10' * -1, CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))",
},
)
self.validate_all(
'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL col MINUTE)',
write={
"bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL col MINUTE)",
"snowflake": "SELECT TIMESTAMPADD(MINUTE, col * -1, CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))",
},
)
self.validate_all(
@ -1113,7 +1161,8 @@ LANGUAGE js AS
write={
"bigquery": "CURRENT_TIME()",
"duckdb": "CURRENT_TIME",
"presto": "CURRENT_TIME()",
"presto": "CURRENT_TIME",
"trino": "CURRENT_TIME",
"hive": "CURRENT_TIME()",
"spark": "CURRENT_TIME()",
},
@ -1490,6 +1539,14 @@ WHERE
"duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)",
},
)
self.validate_all(
"SELECT PARSE_DATE('%Y%m%d', '20081225')",
write={
"bigquery": "SELECT PARSE_DATE('%Y%m%d', '20081225')",
"duckdb": "SELECT CAST(STRPTIME('20081225', '%Y%m%d') AS DATE)",
"snowflake": "SELECT DATE('20081225', 'yyyymmDD')",
},
)
self.validate_all(
"SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text",
write={
@ -1504,9 +1561,48 @@ WHERE
"duckdb": "SELECT ARRAY_TO_STRING(LIST_TRANSFORM(['cake', 'pie', NULL], x -> COALESCE(x, 'MISSING')), '--') AS text",
},
)
self.validate_all(
"STRING(a)",
write={
"bigquery": "STRING(a)",
"snowflake": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
},
)
self.validate_all(
"STRING('2008-12-25 15:30:00', 'America/New_York')",
write={
"bigquery": "STRING('2008-12-25 15:30:00', 'America/New_York')",
"snowflake": "CAST(CONVERT_TIMEZONE('UTC', 'America/New_York', '2008-12-25 15:30:00') AS VARCHAR)",
"duckdb": "CAST(CAST('2008-12-25 15:30:00' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'America/New_York' AS TEXT)",
},
)
self.validate_identity("SELECT * FROM a-b c", "SELECT * FROM a-b AS c")
self.validate_all(
"SAFE_DIVIDE(x, y)",
write={
"bigquery": "SAFE_DIVIDE(x, y)",
"duckdb": "IF((y) <> 0, (x) / (y), NULL)",
"presto": "IF((y) <> 0, (x) / (y), NULL)",
"trino": "IF((y) <> 0, (x) / (y), NULL)",
"hive": "IF((y) <> 0, (x) / (y), NULL)",
"spark2": "IF((y) <> 0, (x) / (y), NULL)",
"spark": "IF((y) <> 0, (x) / (y), NULL)",
"databricks": "IF((y) <> 0, (x) / (y), NULL)",
"snowflake": "IFF((y) <> 0, (x) / (y), NULL)",
},
)
self.validate_all(
"""SELECT JSON_QUERY('{"class": {"students": []}}', '$.class')""",
write={
"bigquery": """SELECT JSON_QUERY('{"class": {"students": []}}', '$.class')""",
"duckdb": """SELECT '{"class": {"students": []}}' -> '$.class'""",
"snowflake": """SELECT GET_PATH(PARSE_JSON('{"class": {"students": []}}'), 'class')""",
},
)
def test_errors(self):
with self.assertRaises(TokenError):
transpile("'\\'", read="bigquery")
@ -2000,3 +2096,23 @@ OPTIONS (
"bigquery": f"SELECT color, ARRAY_AGG(id ORDER BY id {sort_order}) AS ids FROM colors GROUP BY 1",
},
)
def test_json_extract_scalar(self):
for func in ("JSON_EXTRACT_SCALAR", "JSON_VALUE"):
with self.subTest(f"Testing BigQuery's {func}"):
self.validate_all(
f"SELECT {func}('5')",
write={
"bigquery": f"SELECT {func}('5', '$')",
"duckdb": """SELECT '5' ->> '$'""",
},
)
self.validate_all(
f"""SELECT {func}('{{"name": "Jakob", "age": "6"}}', '$.age')""",
write={
"bigquery": f"""SELECT {func}('{{"name": "Jakob", "age": "6"}}', '$.age')""",
"duckdb": """SELECT '{"name": "Jakob", "age": "6"}' ->> '$.age'""",
"snowflake": """SELECT JSON_EXTRACT_PATH_TEXT('{"name": "Jakob", "age": "6"}', 'age')""",
},
)

View file

@ -1,4 +1,4 @@
from datetime import date
from datetime import date, datetime, timezone
from sqlglot import exp, parse_one
from sqlglot.dialects import ClickHouse
from sqlglot.expressions import convert
@ -88,6 +88,7 @@ class TestClickhouse(Validator):
self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)")
self.validate_identity("CAST(x AS DECIMAL(38, 2))", "CAST(x AS Decimal(38, 2))")
self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src")
self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""")
self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b")
@ -95,6 +96,9 @@ class TestClickhouse(Validator):
self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster")
self.validate_identity("TRUNCATE DATABASE db")
self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster")
self.validate_identity(
"SELECT CAST(1730098800 AS DateTime64) AS DATETIME, 'test' AS interp ORDER BY DATETIME WITH FILL FROM toDateTime64(1730098800, 3) - INTERVAL '7' HOUR TO toDateTime64(1730185140, 3) - INTERVAL '7' HOUR STEP toIntervalSecond(900) INTERPOLATE (interp)"
)
self.validate_identity(
"SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) WINDOW window_name AS (PARTITION BY number) QUALIFY partition_count = 4 ORDER BY number"
)
@ -149,6 +153,10 @@ class TestClickhouse(Validator):
self.validate_identity(
"CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)"
)
self.validate_identity(
"SELECT (toUInt8('1') + toUInt8('2')) IS NOT NULL",
"SELECT NOT ((toUInt8('1') + toUInt8('2')) IS NULL)",
)
self.validate_identity(
"SELECT $1$foo$1$",
"SELECT 'foo'",
@ -424,8 +432,13 @@ class TestClickhouse(Validator):
)
self.validate_all(
"SELECT quantile(0.5)(a)",
read={"duckdb": "SELECT quantile(a, 0.5)"},
write={"clickhouse": "SELECT quantile(0.5)(a)"},
read={
"duckdb": "SELECT quantile(a, 0.5)",
"clickhouse": "SELECT median(a)",
},
write={
"clickhouse": "SELECT quantile(0.5)(a)",
},
)
self.validate_all(
"SELECT quantiles(0.5, 0.4)(a)",
@ -526,6 +539,10 @@ class TestClickhouse(Validator):
"SELECT * FROM ABC WHERE hasAny(COLUMNS('.*field') APPLY(toUInt64) APPLY(to), (SELECT groupUniqArray(toUInt64(field))))"
)
self.validate_identity("SELECT col apply", "SELECT col AS apply")
self.validate_identity(
"SELECT name FROM data WHERE (SELECT DISTINCT name FROM data) IS NOT NULL",
"SELECT name FROM data WHERE NOT ((SELECT DISTINCT name FROM data) IS NULL)",
)
def test_clickhouse_values(self):
values = exp.select("*").from_(
@ -645,6 +662,12 @@ class TestClickhouse(Validator):
write={"clickhouse": f"CAST(pow(2, 32) AS {data_type})"},
)
def test_geom_types(self):
data_types = ["Point", "Ring", "LineString", "MultiLineString", "Polygon", "MultiPolygon"]
for data_type in data_types:
with self.subTest(f"Casting to ClickHouse {data_type}"):
self.validate_identity(f"SELECT CAST(val AS {data_type})")
def test_ddl(self):
db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
create_with_cluster = exp.Create(
@ -678,6 +701,7 @@ class TestClickhouse(Validator):
"CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'",
)
self.validate_identity("CREATE FUNCTION linear_equation AS (x, k, b) -> k * x + b")
self.validate_identity("CREATE MATERIALIZED VIEW a.b TO a.c (c Int32) AS SELECT * FROM a.d")
self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
@ -701,6 +725,10 @@ class TestClickhouse(Validator):
self.validate_identity(
"CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'"
)
self.validate_identity(
"CREATE FUNCTION parity_str AS (n) -> IF(n % 2, 'odd', 'even')",
"CREATE FUNCTION parity_str AS n -> CASE WHEN n % 2 THEN 'odd' ELSE 'even' END",
)
self.validate_identity(
"CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'",
"CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'",
@ -1094,6 +1122,92 @@ LIFETIME(MIN 0 MAX 0)""",
convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')"
)
# no fractional seconds
self.assertEqual(
convert(datetime(2020, 1, 1, 0, 0, 1)).sql(dialect=self.dialect),
"CAST('2020-01-01 00:00:01' AS DateTime64(6))",
)
self.assertEqual(
convert(datetime(2020, 1, 1, 0, 0, 1, tzinfo=timezone.utc)).sql(dialect=self.dialect),
"CAST('2020-01-01 00:00:01' AS DateTime64(6, 'UTC'))",
)
# with fractional seconds
self.assertEqual(
convert(datetime(2020, 1, 1, 0, 0, 1, 1)).sql(dialect=self.dialect),
"CAST('2020-01-01 00:00:01.000001' AS DateTime64(6))",
)
self.assertEqual(
convert(datetime(2020, 1, 1, 0, 0, 1, 1, tzinfo=timezone.utc)).sql(
dialect=self.dialect
),
"CAST('2020-01-01 00:00:01.000001' AS DateTime64(6, 'UTC'))",
)
def test_timestr_to_time(self):
# no fractional seconds
time_strings = [
"2020-01-01 00:00:01",
"2020-01-01 00:00:01+01:00",
" 2020-01-01 00:00:01-01:00 ",
"2020-01-01T00:00:01+01:00",
]
for time_string in time_strings:
with self.subTest(f"'{time_string}'"):
self.assertEqual(
exp.TimeStrToTime(this=exp.Literal.string(time_string)).sql(
dialect=self.dialect
),
f"CAST('{time_string}' AS DateTime64(6))",
)
time_strings_no_utc = ["2020-01-01 00:00:01" for i in range(4)]
for utc, no_utc in zip(time_strings, time_strings_no_utc):
with self.subTest(f"'{time_string}' with UTC timezone"):
self.assertEqual(
exp.TimeStrToTime(
this=exp.Literal.string(utc), zone=exp.Literal.string("UTC")
).sql(dialect=self.dialect),
f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))",
)
# with fractional seconds
time_strings = [
"2020-01-01 00:00:01.001",
"2020-01-01 00:00:01.000001",
"2020-01-01 00:00:01.001+00:00",
"2020-01-01 00:00:01.000001-00:00",
"2020-01-01 00:00:01.0001",
"2020-01-01 00:00:01.1+00:00",
]
for time_string in time_strings:
with self.subTest(f"'{time_string}'"):
self.assertEqual(
exp.TimeStrToTime(this=exp.Literal.string(time_string[0])).sql(
dialect=self.dialect
),
f"CAST('{time_string[0]}' AS DateTime64(6))",
)
time_strings_no_utc = [
"2020-01-01 00:00:01.001000",
"2020-01-01 00:00:01.000001",
"2020-01-01 00:00:01.001000",
"2020-01-01 00:00:01.000001",
"2020-01-01 00:00:01.000100",
"2020-01-01 00:00:01.100000",
]
for utc, no_utc in zip(time_strings, time_strings_no_utc):
with self.subTest(f"'{time_string}' with UTC timezone"):
self.assertEqual(
exp.TimeStrToTime(
this=exp.Literal.string(utc), zone=exp.Literal.string("UTC")
).sql(dialect=self.dialect),
f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))",
)
def test_grant(self):
self.validate_identity("GRANT SELECT(x, y) ON db.table TO john WITH GRANT OPTION")
self.validate_identity("GRANT INSERT(x, y) ON db.table TO john")

View file

@ -7,6 +7,7 @@ class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
self.validate_identity("SELECT * FROM stream")
self.validate_identity("SELECT t.current_time FROM t")
self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT")
self.validate_identity("DESCRIBE HISTORY a.b")
@ -116,6 +117,17 @@ class TestDatabricks(Validator):
},
)
self.validate_all(
"SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)",
read={
"databricks": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)",
"spark": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)",
},
write={
"spark": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)",
},
)
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json(self):
self.validate_identity("SELECT c1:price, c1:price.foo, c1:price.bar[1]")

View file

@ -526,7 +526,7 @@ class TestDialect(Validator):
write={
"": "SELECT NVL2(a, b, c)",
"bigquery": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END",
"clickhouse": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END",
"clickhouse": "SELECT CASE WHEN NOT (a IS NULL) THEN b ELSE c END",
"databricks": "SELECT NVL2(a, b, c)",
"doris": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END",
"drill": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END",
@ -552,7 +552,7 @@ class TestDialect(Validator):
write={
"": "SELECT NVL2(a, b)",
"bigquery": "SELECT CASE WHEN NOT a IS NULL THEN b END",
"clickhouse": "SELECT CASE WHEN NOT a IS NULL THEN b END",
"clickhouse": "SELECT CASE WHEN NOT (a IS NULL) THEN b END",
"databricks": "SELECT NVL2(a, b)",
"doris": "SELECT CASE WHEN NOT a IS NULL THEN b END",
"drill": "SELECT CASE WHEN NOT a IS NULL THEN b END",
@ -651,7 +651,7 @@ class TestDialect(Validator):
"snowflake": "CAST('2020-01-01' AS TIMESTAMP)",
"spark": "CAST('2020-01-01' AS TIMESTAMP)",
"trino": "CAST('2020-01-01' AS TIMESTAMP)",
"clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))",
"clickhouse": "CAST('2020-01-01' AS DateTime64(6))",
"drill": "CAST('2020-01-01' AS TIMESTAMP)",
"hive": "CAST('2020-01-01' AS TIMESTAMP)",
"presto": "CAST('2020-01-01' AS TIMESTAMP)",
@ -688,7 +688,7 @@ class TestDialect(Validator):
"snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
"spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
"clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))",
"clickhouse": "CAST('2020-01-01 12:13:14' AS DateTime64(6, 'America/Los_Angeles'))",
"drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
@ -709,7 +709,7 @@ class TestDialect(Validator):
"snowflake": "CAST(col AS TIMESTAMPTZ)",
"spark": "CAST(col AS TIMESTAMP)",
"trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
"clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))",
"clickhouse": "CAST(col AS DateTime64(6, 'America/Los_Angeles'))",
"drill": "CAST(col AS TIMESTAMP)",
"hive": "CAST(col AS TIMESTAMP)",
"presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
@ -2893,3 +2893,121 @@ FROM subquery2""",
"snowflake": "UUID_STRING()",
},
)
def test_escaped_identifier_delimiter(self):
for dialect in ("databricks", "hive", "mysql", "spark2", "spark"):
with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x`"',
read={
dialect: "SELECT 1 AS `x```",
},
write={
dialect: "SELECT 1 AS `x```",
},
)
for dialect in (
"",
"clickhouse",
"duckdb",
"postgres",
"presto",
"trino",
"redshift",
"snowflake",
"sqlite",
):
with self.subTest(f"Testing escaped double-quote in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
dialect: 'SELECT 1 AS "x"""',
},
write={
dialect: 'SELECT 1 AS "x"""',
},
)
for dialect in ("clickhouse", "sqlite"):
with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"):
self.validate_all(
'SELECT 1 AS "x`"',
read={
dialect: "SELECT 1 AS `x```",
},
write={
dialect: 'SELECT 1 AS "x`"',
},
)
self.validate_all(
'SELECT 1 AS "x`"',
read={
"clickhouse": "SELECT 1 AS `x\\``",
},
write={
"clickhouse": 'SELECT 1 AS "x`"',
},
)
for name in ('"x\\""', '`x"`'):
with self.subTest(f"Testing ClickHouse delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
"clickhouse": f"SELECT 1 AS {name}",
},
write={
"clickhouse": 'SELECT 1 AS "x"""',
},
)
for name in ("[[x]]]", '"[x]"'):
with self.subTest(f"Testing T-SQL delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "[x]"',
read={
"tsql": f"SELECT 1 AS {name}",
},
write={
"tsql": "SELECT 1 AS [[x]]]",
},
)
for name in ('[x"]', '"x"""'):
with self.subTest(f"Testing T-SQL delimiter escaping: {name}"):
self.validate_all(
'SELECT 1 AS "x"""',
read={
"tsql": f"SELECT 1 AS {name}",
},
write={
"tsql": 'SELECT 1 AS [x"]',
},
)
def test_median(self):
for suffix in (
"",
" OVER ()",
):
self.validate_all(
f"MEDIAN(x){suffix}",
read={
"snowflake": f"MEDIAN(x){suffix}",
"duckdb": f"MEDIAN(x){suffix}",
"spark": f"MEDIAN(x){suffix}",
"databricks": f"MEDIAN(x){suffix}",
"redshift": f"MEDIAN(x){suffix}",
"oracle": f"MEDIAN(x){suffix}",
},
write={
"snowflake": f"MEDIAN(x){suffix}",
"duckdb": f"MEDIAN(x){suffix}",
"spark": f"MEDIAN(x){suffix}",
"databricks": f"MEDIAN(x){suffix}",
"redshift": f"MEDIAN(x){suffix}",
"oracle": f"MEDIAN(x){suffix}",
"clickhouse": f"MEDIAN(x){suffix}",
"postgres": f"PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
},
)

View file

@ -619,12 +619,6 @@ class TestDuckDB(Validator):
"spark": "ARRAY_SUM(ARRAY(1, 2))",
},
)
self.validate_all(
"IF((y) <> 0, (x) / (y), NULL)",
read={
"bigquery": "SAFE_DIVIDE(x, y)",
},
)
self.validate_all(
"STRUCT_PACK(x := 1, y := '2')",
write={
@ -758,16 +752,9 @@ class TestDuckDB(Validator):
"snowflake": "SELECT PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) FROM t",
},
)
self.validate_all(
"SELECT MEDIAN(x) FROM t",
write={
"duckdb": "SELECT QUANTILE_CONT(x, 0.5) FROM t",
"postgres": "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) FROM t",
"snowflake": "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) FROM t",
},
)
with self.assertRaises(UnsupportedError):
# bq has the position arg, but duckdb doesn't
transpile(
"SELECT REGEXP_EXTRACT(a, 'pattern', 1) from table",
read="bigquery",
@ -775,6 +762,36 @@ class TestDuckDB(Validator):
unsupported_level=ErrorLevel.IMMEDIATE,
)
self.validate_all(
"SELECT REGEXP_EXTRACT(a, 'pattern') FROM t",
read={
"duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t",
"bigquery": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t",
"snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern') FROM t",
},
write={
"duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t",
"bigquery": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t",
"snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern') FROM t",
},
)
self.validate_all(
"SELECT REGEXP_EXTRACT(a, 'pattern', 2, 'i') FROM t",
read={
"snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2) FROM t",
},
write={
"duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern', 2, 'i') FROM t",
"snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2) FROM t",
},
)
self.validate_identity(
"SELECT REGEXP_EXTRACT(a, 'pattern', 0)",
"SELECT REGEXP_EXTRACT(a, 'pattern')",
)
self.validate_identity("SELECT REGEXP_EXTRACT(a, 'pattern', 0, 'i')")
self.validate_identity("SELECT REGEXP_EXTRACT(a, 'pattern', 1, 'i')")
self.validate_identity("SELECT ISNAN(x)")
self.validate_all(

View file

@ -1,5 +1,7 @@
from tests.dialects.test_dialect import Validator
from sqlglot import exp
class TestHive(Validator):
dialect = "hive"
@ -787,6 +789,23 @@ class TestHive(Validator):
},
)
self.validate_identity("EXISTS(col, x -> x % 2 = 0)").assert_is(exp.Exists)
self.validate_all(
"SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
read={
"hive": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
"spark2": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
"spark": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
"databricks": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
},
write={
"spark2": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
"spark": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
"databricks": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)",
},
)
def test_escapes(self) -> None:
self.validate_identity("'\n'", "'\\n'")
self.validate_identity("'\\n'")

View file

@ -388,7 +388,7 @@ class TestMySQL(Validator):
"sqlite": "SELECT x'CC'",
"starrocks": "SELECT x'CC'",
"tableau": "SELECT 204",
"teradata": "SELECT 204",
"teradata": "SELECT X'CC'",
"trino": "SELECT X'CC'",
"tsql": "SELECT 0xCC",
}
@ -409,7 +409,7 @@ class TestMySQL(Validator):
"sqlite": "SELECT x'0000CC'",
"starrocks": "SELECT x'0000CC'",
"tableau": "SELECT 204",
"teradata": "SELECT 204",
"teradata": "SELECT X'0000CC'",
"trino": "SELECT X'0000CC'",
"tsql": "SELECT 0x0000CC",
}

View file

@ -119,13 +119,6 @@ class TestOracle(Validator):
"tsql": UnsupportedError,
},
)
self.validate_all(
"TRUNC(SYSDATE, 'YEAR')",
write={
"clickhouse": "DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())",
"oracle": "TRUNC(SYSDATE, 'YEAR')",
},
)
self.validate_all(
"SELECT * FROM test WHERE MOD(col1, 4) = 3",
read={
@ -632,3 +625,20 @@ WHERE
self.validate_identity("GRANT UPDATE, TRIGGER ON TABLE t TO anita, zhi")
self.validate_identity("GRANT EXECUTE ON PROCEDURE p TO george")
self.validate_identity("GRANT USAGE ON SEQUENCE order_id TO sales_role")
def test_datetrunc(self):
self.validate_all(
"TRUNC(SYSDATE, 'YEAR')",
write={
"clickhouse": "DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())",
"oracle": "TRUNC(SYSDATE, 'YEAR')",
},
)
# Make sure units are not normalized e.g 'Q' -> 'QUARTER' and 'W' -> 'WEEK'
# https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html
for unit in (
"'Q'",
"'W'",
):
self.validate_identity(f"TRUNC(x, {unit})")

View file

@ -797,6 +797,24 @@ class TestPostgres(Validator):
self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1 FOR 1)")
self.validate_identity("ARRAY[1, 2, 3] && ARRAY[1, 2]").assert_is(exp.ArrayOverlaps)
self.validate_all(
"""SELECT JSONB_EXISTS('{"a": [1,2,3]}', 'a')""",
write={
"postgres": """SELECT JSONB_EXISTS('{"a": [1,2,3]}', 'a')""",
"duckdb": """SELECT JSON_EXISTS('{"a": [1,2,3]}', '$.a')""",
},
)
self.validate_all(
"WITH t AS (SELECT ARRAY[1, 2, 3] AS col) SELECT * FROM t WHERE 1 <= ANY(col) AND 2 = ANY(col)",
write={
"postgres": "WITH t AS (SELECT ARRAY[1, 2, 3] AS col) SELECT * FROM t WHERE 1 <= ANY(col) AND 2 = ANY(col)",
"hive": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)",
"spark2": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)",
"spark": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)",
"databricks": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)",
},
)
def test_ddl(self):
# Checks that user-defined types are parsed into DataType instead of Identifier
self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(

View file

@ -414,13 +414,6 @@ class TestPresto(Validator):
"CAST(x AS TIMESTAMP)",
read={"mysql": "TIMESTAMP(x)"},
)
self.validate_all(
"TIMESTAMP(x, 'America/Los_Angeles')",
write={
"duckdb": "CAST(x AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles'",
"presto": "AT_TIMEZONE(CAST(x AS TIMESTAMP), 'America/Los_Angeles')",
},
)
# this case isn't really correct, but it's a fall back for mysql's version
self.validate_all(
"TIMESTAMP(x, '12:00:00')",

View file

@ -331,10 +331,15 @@ WHERE
"snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)",
},
)
self.validate_identity(
"SELECT TIMESTAMPNTZFROMPARTS(2013, 4, 5, 12, 00, 00)",
"SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)",
)
self.validate_all(
"SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)",
read={
"duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)",
"snowflake": "SELECT TIMESTAMP_NTZ_FROM_PARTS(2013, 4, 5, 12, 00, 00)",
},
write={
"duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)",
@ -519,7 +524,6 @@ WHERE
self.validate_all(
f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
read={
"snowflake": f"SELECT MEDIAN(x){suffix}",
"postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
},
write={
@ -529,15 +533,6 @@ WHERE
"snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
},
)
self.validate_all(
f"SELECT MEDIAN(x){suffix}",
write={
"": f"SELECT PERCENTILE_CONT(x, 0.5){suffix}",
"duckdb": f"SELECT QUANTILE_CONT(x, 0.5){suffix}",
"postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
"snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}",
},
)
for func in (
"CORR",
"COVAR_POP",
@ -1768,7 +1763,6 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattene
"REGEXP_SUBSTR(subject, pattern)",
read={
"bigquery": "REGEXP_EXTRACT(subject, pattern)",
"snowflake": "REGEXP_EXTRACT(subject, pattern)",
},
write={
"bigquery": "REGEXP_EXTRACT(subject, pattern)",

View file

@ -314,6 +314,20 @@ TBLPROPERTIES (
"spark": "SELECT COLLECT_LIST(x) FILTER(WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)",
},
)
self.validate_all(
"SELECT ARRAY_AGG(1)",
write={
"duckdb": "SELECT ARRAY_AGG(1)",
"spark": "SELECT COLLECT_LIST(1)",
},
)
self.validate_all(
"SELECT ARRAY_AGG(DISTINCT STRUCT('a'))",
write={
"duckdb": "SELECT ARRAY_AGG(DISTINCT {'col1': 'a'})",
"spark": "SELECT COLLECT_LIST(DISTINCT STRUCT('a' AS col1))",
},
)
self.validate_all(
"SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday",
write={
@ -875,3 +889,9 @@ TBLPROPERTIES (
"databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
},
)
def test_string(self):
for dialect in ("hive", "spark2", "spark", "databricks"):
with self.subTest(f"Testing STRING() for {dialect}"):
query = parse_one("STRING(a)", dialect=dialect)
self.assertEqual(query.sql(dialect), "CAST(a AS STRING)")

View file

@ -222,3 +222,7 @@ class TestSQLite(Validator):
"mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)",
},
)
self.validate_identity(
"CREATE TABLE store (store_id INTEGER PRIMARY KEY AUTOINCREMENT, mgr_id INTEGER NOT NULL UNIQUE REFERENCES staff ON UPDATE CASCADE)"
)

View file

@ -32,6 +32,10 @@ class TestTeradata(Validator):
},
)
self.validate_identity("SELECT 0x1d", "SELECT X'1d'")
self.validate_identity("SELECT X'1D'", "SELECT X'1D'")
self.validate_identity("SELECT x'1d'", "SELECT X'1d'")
self.validate_identity(
"RENAME TABLE emp TO employee", check_command_warning=True
).assert_is(exp.Command)

View file

@ -9,9 +9,30 @@ class TestTrino(Validator):
self.validate_identity("JSON_QUERY(content, 'lax $.HY.*')")
self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITH UNCONDITIONAL WRAPPER)")
self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITHOUT CONDITIONAL WRAPPER)")
def test_listagg(self):
self.validate_identity(
"SELECT LISTAGG(DISTINCT col, ',') WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col, '; ' ON OVERFLOW ERROR) WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE WITH COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE WITHOUT COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE '...' WITH COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE '...' WITHOUT COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl"
)
self.validate_identity(
"SELECT LISTAGG(col) WITHIN GROUP (ORDER BY col DESC) FROM tbl",
"SELECT LISTAGG(col, ',') WITHIN GROUP (ORDER BY col DESC) FROM tbl",
)
def test_trim(self):
self.validate_identity("SELECT TRIM('!' FROM '!foo!')")

View file

@ -411,6 +411,7 @@ class TestTSQL(Validator):
},
)
self.validate_identity("HASHBYTES('MD2', 'x')")
self.validate_identity("LOG(n)")
self.validate_identity("LOG(n, b)")
self.validate_all(
@ -920,6 +921,12 @@ class TestTSQL(Validator):
"": "CREATE TABLE IF NOT EXISTS foo.bar.baz AS SELECT '2020' AS z FROM a.b.c",
},
)
self.validate_all(
"IF NOT EXISTS (SELECT * FROM information_schema.tables WHERE table_name = 'baz' AND table_schema = 'bar' AND table_catalog = 'foo') EXEC('WITH cte1 AS (SELECT 1 AS col_a), cte2 AS (SELECT 1 AS col_b) SELECT * INTO foo.bar.baz FROM (SELECT col_a FROM cte1 UNION ALL SELECT col_b FROM cte2) AS temp')",
read={
"": "CREATE TABLE IF NOT EXISTS foo.bar.baz AS WITH cte1 AS (SELECT 1 AS col_a), cte2 AS (SELECT 1 AS col_b) SELECT col_a FROM cte1 UNION ALL SELECT col_b FROM cte2"
},
)
self.validate_all(
"CREATE OR ALTER VIEW a.b AS SELECT 1",
read={
@ -1567,7 +1574,7 @@ WHERE
"SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
write={
"tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
"clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo",
"clickhouse": "SELECT DATE_DIFF(DAY, CAST(CAST(a AS Nullable(DateTime)) AS DateTime64(6)), CAST(CAST(b AS Nullable(DateTime)) AS DateTime64(6))) AS x FROM foo",
},
)

View file

@ -250,7 +250,6 @@ SELECT LEAD(a, 1) OVER (PARTITION BY a ORDER BY a) AS x
SELECT LEAD(a, 1, b) OVER (PARTITION BY a ORDER BY a) AS x
SELECT X((a, b) -> a + b, z -> z) AS x
SELECT X(a -> a + ("z" - 1))
SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)
SELECT test.* FROM test
SELECT a AS b FROM test
SELECT "a"."b" FROM "a"

View file

@ -307,3 +307,11 @@ ARRAY<STRING>;
# dialect: bigquery
SPLIT(tbl.bin_col, delim);
ARRAY<BINARY>;
# dialect: bigquery
STRING(json_expr);
STRING;
# dialect: bigquery
STRING(timestamp_expr, timezone);
STRING;

View file

@ -14,6 +14,26 @@ SELECT 1 FROM x.y.z AS z;
SELECT 1 FROM x.y.z AS z;
SELECT 1 FROM x.y.z AS z;
# title: only information schema
# dialect: bigquery
SELECT * FROM information_schema.tables;
SELECT * FROM c.db.`information_schema.tables` AS tables;
# title: information schema with db
# dialect: bigquery
SELECT * FROM y.information_schema.tables;
SELECT * FROM c.y.`information_schema.tables` AS tables;
# title: information schema with db, catalog
# dialect: bigquery
SELECT * FROM x.y.information_schema.tables;
SELECT * FROM x.y.`information_schema.tables` AS tables;
# title: information schema with db, catalog, alias
# dialect: bigquery
SELECT * FROM x.y.information_schema.tables AS z;
SELECT * FROM x.y.`information_schema.tables` AS z;
# title: redshift unnest syntax, z.a should be a column, not a table
# dialect: redshift
SELECT 1 FROM y.z AS z, z.a;

View file

@ -2,7 +2,6 @@ import unittest
from sqlglot import exp, parse_one
from sqlglot.diff import Insert, Move, Remove, Update, diff
from sqlglot.expressions import Join, to_table
def diff_delta_only(source, target, matchings=None, **kwargs):
@ -14,22 +13,24 @@ class TestDiff(unittest.TestCase):
self._validate_delta_only(
diff_delta_only(parse_one("SELECT a + b"), parse_one("SELECT a - b")),
[
Remove(parse_one("a + b")), # the Add node
Insert(parse_one("a - b")), # the Sub node
Remove(expression=parse_one("a + b")), # the Add node
Insert(expression=parse_one("a - b")), # the Sub node
Move(source=parse_one("a"), target=parse_one("a")), # the `a` Column node
Move(source=parse_one("b"), target=parse_one("b")), # the `b` Column node
],
)
self._validate_delta_only(
diff_delta_only(parse_one("SELECT a, b, c"), parse_one("SELECT a, c")),
[
Remove(parse_one("b")), # the Column node
Remove(expression=parse_one("b")), # the Column node
],
)
self._validate_delta_only(
diff_delta_only(parse_one("SELECT a, b"), parse_one("SELECT a, b, c")),
[
Insert(parse_one("c")), # the Column node
Insert(expression=parse_one("c")), # the Column node
],
)
@ -40,8 +41,8 @@ class TestDiff(unittest.TestCase):
),
[
Update(
to_table("table_one", quoted=False),
to_table("table_two", quoted=False),
source=exp.to_table("table_one", quoted=False),
target=exp.to_table("table_two", quoted=False),
), # the Table node
],
)
@ -53,8 +54,12 @@ class TestDiff(unittest.TestCase):
),
[
Update(
exp.Lambda(this=exp.to_identifier("a"), expressions=[exp.to_identifier("a")]),
exp.Lambda(this=exp.to_identifier("b"), expressions=[exp.to_identifier("b")]),
source=exp.Lambda(
this=exp.to_identifier("a"), expressions=[exp.to_identifier("a")]
),
target=exp.Lambda(
this=exp.to_identifier("b"), expressions=[exp.to_identifier("b")]
),
),
],
)
@ -65,8 +70,8 @@ class TestDiff(unittest.TestCase):
parse_one('SELECT a, b, "my.udf1"()'), parse_one('SELECT a, b, "my.udf2"()')
),
[
Insert(parse_one('"my.udf2"()')),
Remove(parse_one('"my.udf1"()')),
Insert(expression=parse_one('"my.udf2"()')),
Remove(expression=parse_one('"my.udf1"()')),
],
)
self._validate_delta_only(
@ -75,41 +80,73 @@ class TestDiff(unittest.TestCase):
parse_one('SELECT a, b, "my.udf"(x, y, w)'),
),
[
Insert(exp.column("w")),
Remove(exp.column("z")),
Insert(expression=exp.column("w")),
Remove(expression=exp.column("z")),
],
)
def test_node_position_changed(self):
expr_src = parse_one("SELECT a, b, c")
expr_tgt = parse_one("SELECT c, a, b")
self._validate_delta_only(
diff_delta_only(parse_one("SELECT a, b, c"), parse_one("SELECT c, a, b")),
diff_delta_only(expr_src, expr_tgt),
[
Move(parse_one("c")), # the Column node
Move(source=expr_src.selects[2], target=expr_tgt.selects[0]),
],
)
expr_src = parse_one("SELECT a + b")
expr_tgt = parse_one("SELECT b + a")
self._validate_delta_only(
diff_delta_only(parse_one("SELECT a + b"), parse_one("SELECT b + a")),
diff_delta_only(expr_src, expr_tgt),
[
Move(parse_one("a")), # the Column node
Move(source=expr_src.selects[0].left, target=expr_tgt.selects[0].right),
],
)
expr_src = parse_one("SELECT aaaa AND bbbb")
expr_tgt = parse_one("SELECT bbbb AND aaaa")
self._validate_delta_only(
diff_delta_only(parse_one("SELECT aaaa AND bbbb"), parse_one("SELECT bbbb AND aaaa")),
diff_delta_only(expr_src, expr_tgt),
[
Move(parse_one("aaaa")), # the Column node
Move(source=expr_src.selects[0].left, target=expr_tgt.selects[0].right),
],
)
expr_src = parse_one("SELECT aaaa OR bbbb OR cccc")
expr_tgt = parse_one("SELECT cccc OR bbbb OR aaaa")
self._validate_delta_only(
diff_delta_only(
parse_one("SELECT aaaa OR bbbb OR cccc"),
parse_one("SELECT cccc OR bbbb OR aaaa"),
),
diff_delta_only(expr_src, expr_tgt),
[
Move(parse_one("aaaa")), # the Column node
Move(parse_one("cccc")), # the Column node
Move(source=expr_src.selects[0].left.left, target=expr_tgt.selects[0].right),
Move(source=expr_src.selects[0].right, target=expr_tgt.selects[0].left.left),
],
)
expr_src = parse_one("SELECT a, b FROM t WHERE CONCAT('a', 'b') = 'ab'")
expr_tgt = parse_one("SELECT a FROM t WHERE CONCAT('a', 'b', b) = 'ab'")
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt),
[
Move(source=expr_src.selects[1], target=expr_tgt.find(exp.Concat).expressions[-1]),
],
)
expr_src = parse_one("SELECT a as a, b as b FROM t WHERE CONCAT('a', 'b') = 'ab'")
expr_tgt = parse_one("SELECT a as a FROM t WHERE CONCAT('a', 'b', b) = 'ab'")
b_alias = expr_src.selects[1]
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt),
[
Remove(expression=b_alias),
Move(source=b_alias.this, target=expr_tgt.find(exp.Concat).expressions[-1]),
],
)
@ -130,23 +167,30 @@ class TestDiff(unittest.TestCase):
self._validate_delta_only(
diff_delta_only(parse_one(expr_src), parse_one(expr_tgt)),
[
Remove(parse_one("LOWER(c) AS c")), # the Alias node
Remove(parse_one("LOWER(c)")), # the Lower node
Remove(parse_one("'filter'")), # the Literal node
Insert(parse_one("'different_filter'")), # the Literal node
Remove(expression=parse_one("LOWER(c) AS c")), # the Alias node
Remove(expression=parse_one("LOWER(c)")), # the Lower node
Remove(expression=parse_one("'filter'")), # the Literal node
Insert(expression=parse_one("'different_filter'")), # the Literal node
Move(source=parse_one("c"), target=parse_one("c")), # the new Column c
],
)
def test_join(self):
expr_src = "SELECT a, b FROM t1 LEFT JOIN t2 ON t1.key = t2.key"
expr_tgt = "SELECT a, b FROM t1 RIGHT JOIN t2 ON t1.key = t2.key"
expr_src = parse_one("SELECT a, b FROM t1 LEFT JOIN t2 ON t1.key = t2.key")
expr_tgt = parse_one("SELECT a, b FROM t1 RIGHT JOIN t2 ON t1.key = t2.key")
changes = diff_delta_only(parse_one(expr_src), parse_one(expr_tgt))
src_join = expr_src.find(exp.Join)
tgt_join = expr_tgt.find(exp.Join)
self.assertEqual(len(changes), 2)
self.assertTrue(isinstance(changes[0], Remove))
self.assertTrue(isinstance(changes[1], Insert))
self.assertTrue(all(isinstance(c.expression, Join) for c in changes))
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt),
[
Remove(expression=src_join),
Insert(expression=tgt_join),
Move(source=exp.to_table("t2"), target=exp.to_table("t2")),
Move(source=src_join.args["on"], target=tgt_join.args["on"]),
],
)
def test_window_functions(self):
expr_src = parse_one("SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b)")
@ -157,8 +201,8 @@ class TestDiff(unittest.TestCase):
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt),
[
Remove(parse_one("ROW_NUMBER()")),
Insert(parse_one("RANK()")),
Remove(expression=parse_one("ROW_NUMBER()")),
Insert(expression=parse_one("RANK()")),
Update(source=expr_src.selects[0], target=expr_tgt.selects[0]),
],
)
@ -178,20 +222,21 @@ class TestDiff(unittest.TestCase):
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt),
[
Remove(expr_src),
Insert(expr_tgt),
Insert(exp.Literal.number(2)),
Insert(exp.Literal.number(3)),
Insert(exp.Literal.number(4)),
Remove(expression=expr_src),
Insert(expression=expr_tgt),
Insert(expression=exp.Literal.number(2)),
Insert(expression=exp.Literal.number(3)),
Insert(expression=exp.Literal.number(4)),
Move(source=exp.Literal.number(1), target=exp.Literal.number(1)),
],
)
self._validate_delta_only(
diff_delta_only(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt)]),
[
Insert(exp.Literal.number(2)),
Insert(exp.Literal.number(3)),
Insert(exp.Literal.number(4)),
Insert(expression=exp.Literal.number(2)),
Insert(expression=exp.Literal.number(3)),
Insert(expression=exp.Literal.number(4)),
],
)
@ -274,7 +319,7 @@ class TestDiff(unittest.TestCase):
source=expr_src.find(exp.Order).expressions[0],
target=expr_tgt.find(exp.Order).expressions[0],
),
Move(parse_one("a")),
Move(source=expr_src.selects[0], target=expr_tgt.selects[1]),
],
)

View file

@ -315,7 +315,7 @@ class TestOptimizer(unittest.TestCase):
),
dialect="bigquery",
).sql(),
'WITH "x" AS (SELECT "y"."a" AS "a" FROM "DB"."y" AS "y" CROSS JOIN "a"."b"."INFORMATION_SCHEMA"."COLUMNS" AS "COLUMNS") SELECT "x"."a" AS "a" FROM "x" AS "x"',
'WITH "x" AS (SELECT "y"."a" AS "a" FROM "DB"."y" AS "y" CROSS JOIN "a"."b"."INFORMATION_SCHEMA.COLUMNS" AS "columns") SELECT "x"."a" AS "a" FROM "x" AS "x"',
)
self.assertEqual(
@ -1337,6 +1337,47 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
self.assertEqual(union_by_name.selects[0].type.this, exp.DataType.Type.BIGINT)
self.assertEqual(union_by_name.selects[1].type.this, exp.DataType.Type.DOUBLE)
# Test chained UNIONs
sql = """
WITH t AS
(
SELECT NULL AS col
UNION
SELECT NULL AS col
UNION
SELECT 'a' AS col
UNION
SELECT NULL AS col
UNION
SELECT NULL AS col
)
SELECT col FROM t;
"""
self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR)
# Test UNIONs with nested subqueries
sql = """
WITH t AS
(
SELECT NULL AS col
UNION
(SELECT NULL AS col UNION ALL SELECT 'a' AS col)
)
SELECT col FROM t;
"""
self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR)
sql = """
WITH t AS
(
(SELECT NULL AS col UNION ALL SELECT 'a' AS col)
UNION
SELECT NULL AS col
)
SELECT col FROM t;
"""
self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR)
def test_recursive_cte(self):
query = parse_one(
"""