1
0
Fork 0

Merging upstream version 25.16.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:52:32 +01:00
parent 7688e2bdf8
commit bad79d1f7c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
110 changed files with 75353 additions and 68092 deletions

View file

@ -6,6 +6,15 @@ class TestAthena(Validator):
maxDiff = None
def test_athena(self):
self.validate_identity(
"CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
)
self.validate_identity(
"UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) "
"TO 's3://amzn-s3-demo-bucket/ partitioned/' "
"WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])",
check_command_warning=True,
)
self.validate_identity(
"""USING EXTERNAL FUNCTION some_function(input VARBINARY)
RETURNS VARCHAR
@ -14,7 +23,3 @@ class TestAthena(Validator):
some_function(1)""",
check_command_warning=True,
)
self.validate_identity(
"CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')"
)

View file

@ -1,4 +1,6 @@
from unittest import mock
import datetime
import pytz
from sqlglot import (
ErrorLevel,
@ -103,6 +105,7 @@ LANGUAGE js AS
select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`")
self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF")
self.validate_identity("SELECT * FROM READ_CSV('bla.csv')")
self.validate_identity("CAST(x AS STRUCT<list ARRAY<INT64>>)")
self.validate_identity("assert.true(1 = 1)")
self.validate_identity("SELECT ARRAY_TO_STRING(list, '--') AS text")
@ -446,7 +449,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))",
"mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@ -510,6 +513,20 @@ LANGUAGE js AS
"duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')",
},
)
self.validate_all(
"SELECT COUNTIF(x)",
read={
@ -636,6 +653,7 @@ LANGUAGE js AS
write={
"bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
"databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')",
"duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))",
},
),
)
@ -1209,10 +1227,9 @@ LANGUAGE js AS
"SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
write={
"bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])",
"mysql": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))",
"hive": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"spark": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3)))",
"hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
"spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))",
},
)
self.validate_all(
@ -1256,6 +1273,13 @@ LANGUAGE js AS
"starrocks": "DATE_DIFF('MINUTE', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))",
},
)
self.validate_all(
"DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
write={
"bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)",
"duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))",
},
)
self.validate_all(
"CURRENT_DATE('UTC')",
write={
@ -1402,6 +1426,57 @@ WHERE
"": "SELECT LENGTH(foo)",
},
)
self.validate_all(
"SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
write={
"duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))",
"bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)",
},
)
self.validate_all(
"ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
write={
"bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])",
"duckdb": "ARRAY_CONCAT([1, 2], ARRAY_CONCAT([3, 4], [5, 6]))",
"postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))",
"redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))",
"snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))",
"hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
"spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
"spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
"databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))",
"presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
"trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])",
},
)
self.validate_all(
"SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
write={
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
},
)
self.validate_all(
"SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
write={
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])",
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)",
},
)
self.validate_all(
"SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
write={
"duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)",
"bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)",
},
)
self.validate_all(
"SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
write={
"bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')",
"duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)",
},
)
def test_errors(self):
with self.assertRaises(TokenError):
@ -1794,14 +1869,14 @@ OPTIONS (
"SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])",
write={
"bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY<STRUCT<x INT64>>))",
"duckdb": "SELECT * FROM UNNEST(CAST([] AS STRUCT(x BIGINT)[]))",
"duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))",
},
)
self.validate_all(
"SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])",
write={
"bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>))",
"duckdb": "SELECT * FROM UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]))",
"duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))",
},
)
self.validate_all(
@ -1811,3 +1886,51 @@ OPTIONS (
"duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))",
},
)
def test_convert(self):
for value, expected in [
(datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"),
(datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"),
(
datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc),
"CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)",
),
(
pytz.timezone("America/Los_Angeles").localize(
datetime.datetime(2023, 1, 1, 12, 13, 14)
),
"CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)",
),
]:
with self.subTest(value):
self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected)
def test_unnest(self):
self.validate_all(
"SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
write={
"bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])",
"duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))",
},
)
self.validate_all(
"WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
write={
"bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])",
"duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))",
},
)
self.validate_all(
"SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
write={
"bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
"duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
},
)
self.validate_all(
"WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
write={
"bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant",
"duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant",
},
)

View file

@ -1,4 +1,7 @@
from datetime import date
from sqlglot import exp, parse_one
from sqlglot.dialects import ClickHouse
from sqlglot.expressions import convert
from tests.dialects.test_dialect import Validator
from sqlglot.errors import ErrorLevel
@ -7,27 +10,27 @@ class TestClickhouse(Validator):
dialect = "clickhouse"
def test_clickhouse(self):
self.validate_identity("SELECT toFloat(like)")
self.validate_identity("SELECT like")
for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING:
self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)")
string_types = [
"BLOB",
"LONGBLOB",
"LONGTEXT",
"MEDIUMBLOB",
"MEDIUMTEXT",
"TINYBLOB",
"TINYTEXT",
"VARCHAR(255)",
]
# Arrays, maps and tuples can't be Nullable in ClickHouse
for non_nullable_type in ("ARRAY<INT>", "MAP<INT, INT>", "STRUCT(a: INT)"):
try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})")
target_type = try_cast.to.sql("clickhouse")
self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})")
for string_type in string_types:
self.validate_identity(f"CAST(x AS {string_type})", "CAST(x AS String)")
for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"):
try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})")
target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse")
self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))")
expr = parse_one("count(x)")
self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)")
self.assertIsNone(expr._meta)
self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var)
self.validate_identity("SELECT toFloat(like)")
self.validate_identity("SELECT like")
self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)")
self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')")
self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))")
@ -40,7 +43,7 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)")
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)")
self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000")
self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DATETIME))")
self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))")
self.validate_identity("CAST(x AS Enum('hello', 'world'))")
self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))")
@ -79,7 +82,8 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)")
self.validate_identity("position(haystack, needle)")
self.validate_identity("position(haystack, needle, position)")
self.validate_identity("CAST(x AS DATETIME)")
self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)")
self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)")
self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src")
self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""")
@ -207,11 +211,16 @@ class TestClickhouse(Validator):
},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND",
"SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
read={
"duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
"postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
},
write={
"clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
"duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND",
"postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'",
},
)
self.validate_all(
"SELECT CURRENT_DATE()",
@ -471,6 +480,47 @@ class TestClickhouse(Validator):
parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType
)
self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
self.validate_all(
"INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
read={
# looks like values table function, but should be parsed as VALUES block
"clickhouse": "INSERT INTO t (col1, col2) values('abcd', 1234)"
},
write={
"clickhouse": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
"postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
},
)
def test_clickhouse_values(self):
values = exp.select("*").from_(
exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"])
)
self.assertEqual(
values.sql("clickhouse"),
"SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq",
)
self.validate_identity("INSERT INTO t (col1, col2) VALUES ('abcd', 1234)")
self.validate_identity(
"INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)",
"INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
)
self.validate_all(
"SELECT col FROM (SELECT 1 AS col) AS _t",
read={
"duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)",
},
)
self.validate_all(
"SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t",
read={
"duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)",
},
)
def test_cte(self):
self.validate_identity("WITH 'x' AS foo SELECT foo")
self.validate_identity("WITH ['c'] AS field_names SELECT field_names")
@ -531,7 +581,7 @@ class TestClickhouse(Validator):
self.validate_all(
"SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
write={
"clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DATETIME}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
"clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}",
"": "SELECT :abc, :b, :c, :d, :e",
},
)
@ -562,14 +612,77 @@ class TestClickhouse(Validator):
)
def test_ddl(self):
db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
create_with_cluster = exp.Create(
this=db_table_expr,
kind="DATABASE",
properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
)
self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
ctas_with_comment = exp.Create(
this=exp.table_("foo"),
kind="TABLE",
expression=exp.select("*").from_("db.other_table"),
properties=exp.Properties(
expressions=[
exp.EngineProperty(this=exp.var("Memory")),
exp.SchemaCommentProperty(this=exp.Literal.string("foo")),
],
),
)
self.assertEqual(
ctas_with_comment.sql("clickhouse"),
"CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'",
)
self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))")
self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""")
self.validate_identity(
"CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
)
self.validate_identity(
"CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
)
self.validate_identity(
"""CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'"""
)
self.validate_identity(
"INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3"
)
self.validate_identity(
'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)'
)
self.validate_identity(
"CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'"
)
self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))")
self.validate_identity(
"CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'",
"CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'",
)
self.validate_all(
"CREATE DATABASE x",
read={
"duckdb": "CREATE SCHEMA x",
},
write={
"clickhouse": "CREATE DATABASE x",
"duckdb": "CREATE SCHEMA x",
},
)
self.validate_all(
"DROP DATABASE x",
read={
"duckdb": "DROP SCHEMA x",
},
write={
"clickhouse": "DROP DATABASE x",
"duckdb": "DROP SCHEMA x",
},
)
self.validate_all(
"""
CREATE TABLE example1 (
@ -583,7 +696,7 @@ class TestClickhouse(Validator):
""",
write={
"clickhouse": """CREATE TABLE example1 (
timestamp DATETIME,
timestamp DateTime,
x UInt32 TTL now() + INTERVAL '1' MONTH,
y String TTL timestamp + INTERVAL '1' DAY,
z String
@ -661,7 +774,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE example_table (
d DATETIME,
d DateTime,
a Int32
)
ENGINE=MergeTree
@ -688,7 +801,7 @@ TTL
""",
write={
"clickhouse": """CREATE TABLE table_with_where (
d DATETIME,
d DateTime,
a Int32
)
ENGINE=MergeTree
@ -716,7 +829,7 @@ WHERE
""",
write={
"clickhouse": """CREATE TABLE table_for_recompression (
d DATETIME,
d DateTime,
key UInt64,
value String
)
@ -748,7 +861,7 @@ SETTINGS
""",
write={
"clickhouse": """CREATE TABLE table_for_aggregation (
d DATETIME,
d DateTime,
k1 Int32,
k2 Int32,
x Int32,
@ -855,8 +968,6 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")
self.validate_all(
"""
CREATE TABLE t (
@ -873,12 +984,6 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
self.validate_identity(
"CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()"
)
self.validate_identity(
"CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()"
)
def test_agg_functions(self):
def extract_agg_func(query):
@ -934,3 +1039,8 @@ LIFETIME(MIN 0 MAX 0)""",
f"SELECT {func_alias}(SECOND, 1, bar)",
f"SELECT {func_name}(SECOND, 1, bar)",
)
def test_convert(self):
self.assertEqual(
convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')"
)

View file

@ -256,3 +256,11 @@ class TestDatabricks(Validator):
"databricks": "WITH x AS (SELECT 1) SELECT * FROM x",
},
)
def test_streaming_tables(self):
self.validate_identity(
"CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')"
)
self.validate_identity(
"CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')"
)

View file

@ -160,7 +160,7 @@ class TestDialect(Validator):
"CAST(a AS TEXT)",
write={
"bigquery": "CAST(a AS STRING)",
"clickhouse": "CAST(a AS String)",
"clickhouse": "CAST(a AS Nullable(String))",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"materialize": "CAST(a AS TEXT)",
@ -181,7 +181,7 @@ class TestDialect(Validator):
"CAST(a AS BINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
"clickhouse": "CAST(a AS BINARY(4))",
"clickhouse": "CAST(a AS Nullable(BINARY(4)))",
"drill": "CAST(a AS VARBINARY(4))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
@ -201,7 +201,7 @@ class TestDialect(Validator):
"CAST(a AS VARBINARY(4))",
write={
"bigquery": "CAST(a AS BYTES)",
"clickhouse": "CAST(a AS String)",
"clickhouse": "CAST(a AS Nullable(String))",
"duckdb": "CAST(a AS BLOB(4))",
"materialize": "CAST(a AS BYTEA(4))",
"mysql": "CAST(a AS VARBINARY(4))",
@ -219,19 +219,19 @@ class TestDialect(Validator):
self.validate_all(
"CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
write={
"clickhouse": "CAST(map('a', '1') AS Map(String, String))",
"clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))",
},
)
self.validate_all(
"CAST(ARRAY(1, 2) AS ARRAY<TINYINT>)",
write={
"clickhouse": "CAST([1, 2] AS Array(Int8))",
"clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))",
},
)
self.validate_all(
"CAST((1, 2) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
"CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
write={
"clickhouse": "CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64))",
"clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))",
},
)
self.validate_all(
@ -327,20 +327,10 @@ class TestDialect(Validator):
"postgres": "CAST(a AS DOUBLE PRECISION)",
"redshift": "CAST(a AS DOUBLE PRECISION)",
},
write={
"duckdb": "CAST(a AS DOUBLE)",
"drill": "CAST(a AS DOUBLE)",
"postgres": "CAST(a AS DOUBLE PRECISION)",
"redshift": "CAST(a AS DOUBLE PRECISION)",
"doris": "CAST(a AS DOUBLE)",
},
)
self.validate_all(
"CAST(a AS DOUBLE)",
write={
"bigquery": "CAST(a AS FLOAT64)",
"clickhouse": "CAST(a AS Float64)",
"clickhouse": "CAST(a AS Nullable(Float64))",
"doris": "CAST(a AS DOUBLE)",
"drill": "CAST(a AS DOUBLE)",
"duckdb": "CAST(a AS DOUBLE)",
"materialize": "CAST(a AS DOUBLE PRECISION)",
@ -592,7 +582,7 @@ class TestDialect(Validator):
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)",
"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')",
"drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')",
"redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')",
"redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')",
"spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')",
},
)
@ -647,14 +637,66 @@ class TestDialect(Validator):
self.validate_all(
"TIME_STR_TO_TIME('2020-01-01')",
write={
"drill": "CAST('2020-01-01' AS TIMESTAMP)",
"bigquery": "CAST('2020-01-01' AS DATETIME)",
"databricks": "CAST('2020-01-01' AS TIMESTAMP)",
"duckdb": "CAST('2020-01-01' AS TIMESTAMP)",
"tsql": "CAST('2020-01-01' AS DATETIME2)",
"mysql": "CAST('2020-01-01' AS DATETIME)",
"postgres": "CAST('2020-01-01' AS TIMESTAMP)",
"redshift": "CAST('2020-01-01' AS TIMESTAMP)",
"snowflake": "CAST('2020-01-01' AS TIMESTAMP)",
"spark": "CAST('2020-01-01' AS TIMESTAMP)",
"trino": "CAST('2020-01-01' AS TIMESTAMP)",
"clickhouse": "CAST('2020-01-01' AS Nullable(DateTime))",
"drill": "CAST('2020-01-01' AS TIMESTAMP)",
"hive": "CAST('2020-01-01' AS TIMESTAMP)",
"presto": "CAST('2020-01-01' AS TIMESTAMP)",
"sqlite": "'2020-01-01'",
"doris": "CAST('2020-01-01' AS DATETIME)",
},
)
self.validate_all(
"TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')",
write={
"bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
"tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
"mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)",
"postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
"redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
"snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
"spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
"clickhouse": "CAST('2020-01-01 12:13:14' AS Nullable(DateTime('America/Los_Angeles')))",
"drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
"presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
"sqlite": "'2020-01-01 12:13:14-08:00'",
"doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)",
},
)
self.validate_all(
"TIME_STR_TO_TIME(col, 'America/Los_Angeles')",
write={
"bigquery": "CAST(col AS TIMESTAMP)",
"databricks": "CAST(col AS TIMESTAMP)",
"duckdb": "CAST(col AS TIMESTAMPTZ)",
"tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
"mysql": "CAST(col AS DATETIME)",
"postgres": "CAST(col AS TIMESTAMPTZ)",
"redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
"snowflake": "CAST(col AS TIMESTAMPTZ)",
"spark": "CAST(col AS TIMESTAMP)",
"trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
"clickhouse": "CAST(col AS Nullable(DateTime('America/Los_Angeles')))",
"drill": "CAST(col AS TIMESTAMP)",
"hive": "CAST(col AS TIMESTAMP)",
"presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
"sqlite": "col",
"doris": "CAST(col AS DATETIME)",
},
)
self.validate_all(
"TIME_STR_TO_UNIX('2020-01-01')",
write={
@ -2115,6 +2157,17 @@ SELECT
},
)
# needs to preserve the target alias in then WHEN condition but not in the THEN clause
self.validate_all(
"""MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a
WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO'
WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""",
write={
"trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
"postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""",
},
)
def test_substring(self):
self.validate_all(
"SUBSTR('123456', 2, 3)",
@ -2603,3 +2656,46 @@ FROM subquery2""",
"trino": f"SELECT {pad_func}('bar', 5, ' ')",
},
)
def test_generate_date_array(self):
self.validate_all(
"SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
write={
"bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
"databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
"duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (7 * INTERVAL '1' DAY)) AS DATE[]))",
"mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
"postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS value) AS _unnested_generate_series",
"presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
"redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates",
"snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _u(seq, key, path, index, value, this))",
"spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))",
"trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))",
"tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates",
},
)
self.validate_all(
"WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
write={
"mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
"redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
"tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates",
},
)
self.validate_all(
"WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2",
write={
"mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
"redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
"tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2",
},
)
self.validate_all(
"SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)",
write={
"mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
"redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates",
"snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1 - 1) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)",
"tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates",
},
)

View file

@ -8,8 +8,6 @@ class TestDuckDB(Validator):
dialect = "duckdb"
def test_duckdb(self):
self.validate_identity("x::int[3]", "CAST(x AS INT[3])")
with self.assertRaises(ParseError):
parse_one("1 //", read="duckdb")
@ -34,31 +32,6 @@ class TestDuckDB(Validator):
"mysql": "SELECT `straight_join`",
},
)
self.validate_all(
"SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
read={
"duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
"snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
read={
"duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
"mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
},
)
self.validate_all(
"SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
read={
"bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
"snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
},
write={
"duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
"snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
},
)
self.validate_all(
'STRUCT_PACK("a b" := 1)',
write={
@ -112,7 +85,9 @@ class TestDuckDB(Validator):
self.validate_all(
"CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)",
read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"},
read={
"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)",
},
)
self.validate_identity("SELECT 1 WHERE x > $1")
self.validate_identity("SELECT 1 WHERE x > $name")
@ -128,13 +103,17 @@ class TestDuckDB(Validator):
)
self.validate_all(
"{'a': 1, 'b': '2'}", write={"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))"}
"{'a': 1, 'b': '2'}",
write={
"presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))",
},
)
self.validate_all(
"struct_pack(a := 1, b := 2)",
write={"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))"},
write={
"presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
},
)
self.validate_all(
"struct_pack(a := 1, b := x)",
write={
@ -818,6 +797,9 @@ class TestDuckDB(Validator):
)
self.validate_identity("SELECT LENGTH(foo)")
self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]")
self.validate_identity("SELECT * FROM (DESCRIBE t)")
def test_array_index(self):
with self.assertLogs(helper_logger) as cm:
@ -909,12 +891,12 @@ class TestDuckDB(Validator):
"EPOCH_MS(x)",
write={
"bigquery": "TIMESTAMP_MILLIS(x)",
"clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
"duckdb": "EPOCH_MS(x)",
"mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
"spark": "TIMESTAMP_MILLIS(x)",
"clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Int64))",
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
"mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
},
)
self.validate_all(
@ -958,7 +940,7 @@ class TestDuckDB(Validator):
self.validate_all(
"STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
write={
"bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)",
"bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)",
"duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
"presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)",
@ -1023,6 +1005,7 @@ class TestDuckDB(Validator):
self.validate_identity("ARRAY((SELECT id FROM t))")
def test_cast(self):
self.validate_identity("x::int[3]", "CAST(x AS INT[3])")
self.validate_identity("CAST(x AS REAL)")
self.validate_identity("CAST(x AS UINTEGER)")
self.validate_identity("CAST(x AS UBIGINT)")
@ -1076,6 +1059,39 @@ class TestDuckDB(Validator):
"STRUCT_PACK(a := 'b')::STRUCT(a TEXT)",
"CAST(ROW('b') AS STRUCT(a TEXT))",
)
self.validate_all(
"CAST(x AS TIME)",
read={
"duckdb": "CAST(x AS TIME)",
"presto": "CAST(x AS TIME(6))",
},
)
self.validate_all(
"SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
read={
"duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)",
"snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)",
},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
read={
"duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t",
"mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t",
},
)
self.validate_all(
"SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR",
read={
"bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)",
"snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))",
},
write={
"duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR",
"snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'",
},
)
self.validate_all(
"CAST(x AS VARCHAR(5))",
write={
@ -1156,6 +1172,12 @@ class TestDuckDB(Validator):
},
)
self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])")
self.validate_identity(
"""SELECT ARRAY[[[1]]]::INT[1][1][1]""",
"""SELECT CAST([[[1]]] AS INT[1][1][1])""",
)
def test_encode_decode(self):
self.validate_all(
"ENCODE(x)",

View file

@ -171,6 +171,16 @@ class TestHive(Validator):
self.validate_identity(
"""CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')"""
)
self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2")
self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2")
self.validate_identity(
"ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1"
)
self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2")
self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')")
self.validate_identity(
"ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True
)
def test_lateral_view(self):
self.validate_all(

View file

@ -24,6 +24,18 @@ class TestMySQL(Validator):
self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)")
self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)")
self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT")
self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
self.validate_identity(
"ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True
)
self.validate_identity(
"ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo",
check_command_warning=True,
)
self.validate_identity(
"ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True
)
self.validate_identity(
"INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b"
)

View file

@ -6,6 +6,7 @@ class TestOracle(Validator):
dialect = "oracle"
def test_oracle(self):
self.validate_identity("1 /* /* */")
self.validate_all(
"SELECT CONNECT_BY_ROOT x y",
write={
@ -13,8 +14,9 @@ class TestOracle(Validator):
"oracle": "SELECT CONNECT_BY_ROOT x AS y",
},
)
self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.AlterTable)
self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter)
self.validate_identity("SYSDATE")
self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders")
self.validate_identity("REGEXP_REPLACE('source', 'search')")
@ -43,6 +45,9 @@ class TestOracle(Validator):
self.validate_identity("SELECT COUNT(*) * 10 FROM orders SAMPLE (10) SEED (1)")
self.validate_identity("SELECT * FROM V$SESSION")
self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')")
self.validate_identity(
"SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))"
)
self.validate_identity(
"SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name"
)
@ -72,10 +77,6 @@ class TestOracle(Validator):
"SELECT JSON_OBJECTAGG(KEY department_name VALUE department_id) FROM dep WHERE id <= 30",
"SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30",
)
self.validate_identity(
"SYSDATE",
"CURRENT_TIMESTAMP",
)
self.validate_identity(
"SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) "
'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) '
@ -88,7 +89,6 @@ class TestOracle(Validator):
self.validate_identity(
"SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY",
)
self.validate_identity("NVL(x, y)").assert_is(exp.Anonymous)
self.validate_identity(
"SELECT * FROM t SAMPLE (.25)",
"SELECT * FROM t SAMPLE (0.25)",
@ -98,6 +98,16 @@ class TestOracle(Validator):
"SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2"
)
self.validate_all(
"SELECT * FROM test WHERE MOD(col1, 4) = 3",
read={
"duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
},
write={
"duckdb": "SELECT * FROM test WHERE col1 % 4 = 3",
"oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3",
},
)
self.validate_all(
"CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
read={
@ -242,6 +252,15 @@ class TestOracle(Validator):
"""SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""",
)
self.validate_all(
"NVL(NULL, 1)",
write={
"oracle": "NVL(NULL, 1)",
"": "COALESCE(NULL, 1)",
"clickhouse": "COALESCE(NULL, 1)",
},
)
def test_join_marker(self):
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")

View file

@ -28,7 +28,7 @@ class TestPostgres(Validator):
alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION"""
expr = self.parse_one(alter_table_only)
self.assertIsInstance(expr, exp.AlterTable)
self.assertIsInstance(expr, exp.Alter)
self.assertEqual(expr.sql(dialect="postgres"), alter_table_only)
self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')")
@ -548,47 +548,54 @@ class TestPostgres(Validator):
"postgres": "x # y",
},
)
self.validate_all(
"SELECT GENERATE_SERIES(1, 5)",
write={
"bigquery": UnsupportedError,
"postgres": "SELECT GENERATE_SERIES(1, 5)",
},
)
self.validate_all(
"WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
write={
"duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
"postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table",
},
)
self.validate_all(
"GENERATE_SERIES(a, b, ' 2 days ')",
write={
"postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
"presto": "SEQUENCE(a, b, INTERVAL '2' DAY)",
"trino": "SEQUENCE(a, b, INTERVAL '2' DAY)",
"presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
"trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))",
},
)
self.validate_all(
"GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
write={
"databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
"hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
"postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
"presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
"trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
"hive": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)",
"spark2": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)",
"spark": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)",
"databricks": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY)",
"presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
"spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
"spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))",
"trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))",
},
)
self.validate_all(
"GENERATE_SERIES(a, b)",
"SELECT * FROM GENERATE_SERIES(a, b)",
read={
"postgres": "GENERATE_SERIES(a, b)",
"presto": "SEQUENCE(a, b)",
"trino": "SEQUENCE(a, b)",
"tsql": "GENERATE_SERIES(a, b)",
"hive": "SEQUENCE(a, b)",
"spark2": "SEQUENCE(a, b)",
"spark": "SEQUENCE(a, b)",
"databricks": "SEQUENCE(a, b)",
"tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
},
write={
"postgres": "GENERATE_SERIES(a, b)",
"presto": "SEQUENCE(a, b)",
"trino": "SEQUENCE(a, b)",
"tsql": "GENERATE_SERIES(a, b)",
"hive": "SEQUENCE(a, b)",
"spark2": "SEQUENCE(a, b)",
"spark": "SEQUENCE(a, b)",
"databricks": "SEQUENCE(a, b)",
"databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
"hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
"postgres": "SELECT * FROM GENERATE_SERIES(a, b)",
"presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
"spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
"spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))",
"trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))",
"tsql": "SELECT * FROM GENERATE_SERIES(a, b)",
},
)
self.validate_all(

View file

@ -13,6 +13,13 @@ class TestPresto(Validator):
self.validate_identity("CAST(TDIGEST_AGG(1) AS TDIGEST)")
self.validate_identity("CAST(x AS HYPERLOGLOG)")
self.validate_all(
"SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
write={
"duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)",
"presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')",
},
)
self.validate_all(
"CAST(x AS INTERVAL YEAR TO MONTH)",
write={

View file

@ -6,7 +6,6 @@ class TestRedshift(Validator):
dialect = "redshift"
def test_redshift(self):
self.validate_identity("1 div", "1 AS div")
self.validate_all(
"SELECT SPLIT_TO_ARRAY('12,345,6789')",
write={
@ -315,6 +314,7 @@ class TestRedshift(Validator):
)
def test_identity(self):
self.validate_identity("1 div", "1 AS div")
self.validate_identity("LISTAGG(DISTINCT foo, ', ')")
self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1")
self.validate_identity("SELECT DATEADD(DAY, 1, 'today')")
@ -337,6 +337,10 @@ class TestRedshift(Validator):
self.validate_identity(
"""SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)"""
)
self.validate_identity(
'DATE_PART(year, "somecol")',
'EXTRACT(year FROM "somecol")',
).this.assert_is(exp.Var)
self.validate_identity(
"SELECT CONCAT('abc', 'def')",
"SELECT 'abc' || 'def'",
@ -430,6 +434,14 @@ ORDER BY
)
self.validate_identity("SELECT JSON_PARSE('[]')")
self.validate_identity("SELECT ARRAY(1, 2, 3)")
self.validate_identity("SELECT ARRAY[1, 2, 3]")
self.validate_identity(
"""SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""",
"""SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""",
)
def test_values(self):
# Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError
values = [str(v) for v in range(0, 10000)]
@ -608,3 +620,9 @@ FROM (
"select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)",
"SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)",
)
def test_time(self):
self.validate_all(
"TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"},
)

View file

@ -11,6 +11,7 @@ class TestSnowflake(Validator):
dialect = "snowflake"
def test_snowflake(self):
self.validate_identity("1 /* /* */")
self.validate_identity(
"SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table"
)
@ -59,6 +60,7 @@ WHERE
)""",
)
self.validate_identity("exclude := [foo]")
self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))")
self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias")
self.validate_identity("SELECT number").selects[0].assert_is(exp.Column)
@ -113,6 +115,18 @@ WHERE
self.validate_identity("ALTER TABLE a SWAP WITH b")
self.validate_identity("SELECT MATCH_CONDITION")
self.validate_identity("SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t")
self.validate_identity(
"SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid"
)
self.validate_identity(
"SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid"
)
self.validate_identity(
"SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY ORDER BY quarter)) ORDER BY empid"
)
self.validate_identity(
"SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR IN (ANY)) ORDER BY empid"
)
self.validate_identity(
"MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)"
)
@ -125,6 +139,18 @@ WHERE
self.validate_identity(
"SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID"
)
self.validate_identity(
"SELECT * FROM s WHERE c NOT IN (1, 2, 3)",
"SELECT * FROM s WHERE NOT c IN (1, 2, 3)",
)
self.validate_identity(
"SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)",
"SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)",
)
self.validate_identity(
"SELECT * FROM t1 INNER JOIN t2 USING (t1.col)",
"SELECT * FROM t1 INNER JOIN t2 USING (col)",
)
self.validate_identity(
"CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)",
"CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)",
@ -847,6 +873,33 @@ WHERE
},
)
self.validate_identity(
"""SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""",
"""SELECT CAST(['foo'] AS VARIANT)[0]""",
)
self.validate_all(
"SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
write={
"snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
"spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
"databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
"redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')",
},
)
self.validate_all(
"SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
write={
"snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
"spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
"databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
"redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')",
"mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')",
"duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'",
},
)
def test_null_treatment(self):
self.validate_all(
r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1",
@ -1921,7 +1974,7 @@ STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""",
def test_swap(self):
ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake")
assert isinstance(ast, exp.AlterTable)
assert isinstance(ast, exp.Alter)
assert isinstance(ast.args["actions"][0], exp.SwapTable)
def test_try_cast(self):

View file

@ -129,6 +129,16 @@ TBLPROPERTIES (
"spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)",
},
)
self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo")
self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo")
self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed")
self.validate_identity(
"ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')"
)
self.validate_identity(
"ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')",
check_command_warning=True,
)
def test_to_date(self):
self.validate_all(
@ -297,6 +307,13 @@ TBLPROPERTIES (
},
)
self.validate_all(
"SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday",
write={
"presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday",
"spark": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), 'EEEE') AS weekday",
},
)
self.validate_all(
"SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)",
read={
@ -557,7 +574,10 @@ TBLPROPERTIES (
)
self.validate_all(
"CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"}
"CAST(x AS TIMESTAMP)",
read={
"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)",
},
)
self.validate_all(
"SELECT DATE_ADD(my_date_column, 1)",
@ -688,6 +708,7 @@ TBLPROPERTIES (
"trino": "SELECT DATE_ADD('MONTH', 20, col)",
},
)
self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')")
def test_bool_or(self):
self.validate_all(
@ -805,8 +826,22 @@ TBLPROPERTIES (
self.assertEqual(query.sql(name), without_modifiers)
def test_schema_binding_options(self):
for schema_binding in ("BINDING", "COMPENSATION", "TYPE EVOLUTION", "EVOLUTION"):
for schema_binding in (
"BINDING",
"COMPENSATION",
"TYPE EVOLUTION",
"EVOLUTION",
):
with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"):
self.validate_identity(
f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp"
)
def test_minus(self):
self.validate_all(
"SELECT * FROM db.table1 MINUS SELECT * FROM db.table2",
write={
"spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
"databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2",
},
)

View file

@ -1,3 +1,4 @@
from sqlglot import exp
from tests.dialects.test_dialect import Validator
@ -31,6 +32,10 @@ class TestTeradata(Validator):
},
)
self.validate_identity(
"RENAME TABLE emp TO employee", check_command_warning=True
).assert_is(exp.Command)
def test_translate(self):
self.validate_all(
"TRANSLATE(x USING LATIN_TO_UNICODE)",

View file

@ -16,3 +16,35 @@ class TestTrino(Validator):
"SELECT TRIM('!foo!', '!')",
"SELECT TRIM('!' FROM '!foo!')",
)
def test_ddl(self):
self.validate_identity("ALTER TABLE users RENAME TO people")
self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people")
self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR")
self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR")
self.validate_identity("ALTER TABLE users DROP COLUMN zip")
self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip")
self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id")
self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id")
self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT")
self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL")
self.validate_identity(
"ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True
)
self.validate_identity(
"ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True
)
self.validate_identity(
"ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True
)
self.validate_identity(
"ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456",
check_command_warning=True,
)
self.validate_identity(
"ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True
)
self.validate_identity("ALTER VIEW people RENAME TO users")
self.validate_identity(
"ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True
)

View file

@ -792,7 +792,7 @@ class TestTSQL(Validator):
self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x")
self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is(
exp.AlterTable
exp.Alter
).args["actions"][0].assert_is(exp.Drop)
for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"):
@ -822,6 +822,20 @@ class TestTSQL(Validator):
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=ON")
self.validate_identity("ALTER TABLE tbl SET DATA_DELETION=OFF")
self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo")
self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100")
self.validate_identity(
"ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100",
check_command_warning=True,
)
self.validate_identity(
"ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100",
check_command_warning=True,
)
self.validate_identity(
"ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100",
check_command_warning=True,
)
self.validate_identity(
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7 END",
"CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7 END",
@ -1513,6 +1527,15 @@ WHERE
},
)
# Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672
self.validate_all(
"SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
write={
"tsql": "SELECT DATEDIFF(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo",
"clickhouse": "SELECT DATE_DIFF(DAY, CAST(a AS Nullable(DateTime)), CAST(b AS Nullable(DateTime))) AS x FROM foo",
},
)
def test_lateral_subquery(self):
self.validate_all(
"SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)",
@ -1650,7 +1673,7 @@ WHERE
},
write={
"bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
"clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
"clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))",
"mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
@ -1665,7 +1688,7 @@ WHERE
"EOMONTH(GETDATE(), -1)",
write={
"bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))",
"clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
"clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)",
"mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",

View file

@ -776,6 +776,10 @@ ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN'), PARTITION(
ALTER TABLE mydataset.mytable DELETE WHERE x = 1
ALTER TABLE table1 RENAME COLUMN c1 TO c2
ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2
ALTER TABLE table1 RENAME TO table2
ALTER VIEW view1 AS SELECT a, b, c FROM table1
ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2
ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 LIMIT 100
SELECT div.a FROM test_table AS div
WITH view AS (SELECT 1 AS x) SELECT * FROM view
ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>>
@ -872,4 +876,5 @@ SELECT cube, cube.x FROM cube
SELECT * FROM a STRAIGHT_JOIN b
SELECT COUNT(DISTINCT "foo bar") FROM (SELECT 1 AS "foo bar") AS t
SELECT vector
WITH all AS (SELECT 1 AS count) SELECT all.count FROM all
WITH all AS (SELECT 1 AS count) SELECT all.count FROM all
SELECT rename

View file

@ -15,6 +15,13 @@ bool;
null;
null;
null and false;
bool;
null + 1;
int;
CASE WHEN x THEN NULL ELSE 1 END;
INT;

View file

@ -52,6 +52,10 @@ SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE CASE WHEN COALESCE("x"."b" <> 0, 1 <
DATE('2023-01-01');
CAST('2023-01-01' AS DATE);
-- Some dialects only allow dates
DATE('2023-01-01 00:00:00');
DATE('2023-01-01 00:00:00');
TIMESTAMP('2023-01-01');
CAST('2023-01-01' AS TIMESTAMP);

View file

@ -343,6 +343,11 @@ WITH tbl1 AS (SELECT STRUCT(1 AS col1, Struct(5 AS col1)) AS col) SELECT tbl1.co
WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1;
WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1 AS tbl1;
# title: CSV files are not scanned by default
# execute: false
SELECT * FROM READ_CSV('file.csv');
SELECT * FROM READ_CSV('file.csv') AS _q_0;
--------------------------------------
-- CTEs
--------------------------------------
@ -655,6 +660,16 @@ SELECT x.a + x.b AS f, x.a + x.b AS _col_1, x.a + x.b + 5 AS _col_2 FROM x AS x;
SELECT a, SUM(b) AS c, SUM(c) OVER(PARTITION BY a) AS d from x group by 1 ORDER BY a;
SELECT x.a AS a, SUM(x.b) AS c, SUM(SUM(x.b)) OVER (PARTITION BY x.a) AS d FROM x AS x GROUP BY x.a ORDER BY a;
# title: we can't expand aliases corresponding to recursive CTE columns (CTE names output columns)
# execute: false
WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t;
WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t;
# title: we can't expand aliases corresponding to recursive CTE columns (CTE doesn't name output columns)
# execute: false
WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t;
WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t;
--------------------------------------
-- Wrapped tables / join constructs
--------------------------------------

View file

@ -1,3 +1,4 @@
import sys
import datetime
import math
import unittest
@ -431,6 +432,31 @@ class TestExpressions(unittest.TestCase):
table = expression.find(exp.Table)
self.assertEqual(table.alias_column_names, ["a", "b"])
def test_cast(self):
expression = parse_one("select cast(x as DATE)")
casts = list(expression.find_all(exp.Cast))
self.assertEqual(len(casts), 1)
cast = casts[0]
self.assertTrue(cast.to.is_type(exp.DataType.Type.DATE))
# check that already cast values arent re-cast if wrapped in a cast to the same type
recast = exp.cast(cast, to=exp.DataType.Type.DATE)
self.assertEqual(recast, cast)
self.assertEqual(recast.sql(), "CAST(x AS DATE)")
# however, recasting is fine if the types are different
recast = exp.cast(cast, to=exp.DataType.Type.VARCHAR)
self.assertNotEqual(recast, cast)
self.assertEqual(len(list(recast.find_all(exp.Cast))), 2)
self.assertEqual(recast.sql(), "CAST(CAST(x AS DATE) AS VARCHAR)")
# check that dialect is used when casting strings
self.assertEqual(
exp.cast("x", to="regtype", dialect="postgres").sql(), "CAST(x AS REGTYPE)"
)
self.assertEqual(exp.cast("`x`", to="date", dialect="hive").sql(), 'CAST("x" AS DATE)')
def test_ctes(self):
expression = parse_one("SELECT a FROM x")
self.assertEqual(expression.ctes, [])
@ -657,7 +683,10 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("TIME_TO_TIME_STR(a)"), exp.Cast)
self.assertIsInstance(parse_one("TIME_TO_UNIX(a)"), exp.TimeToUnix)
self.assertIsInstance(parse_one("TIME_STR_TO_DATE(a)"), exp.TimeStrToDate)
self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime)
(self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime),)
self.assertIsInstance(
parse_one("TIME_STR_TO_TIME(a, 'America/Los_Angeles')"), exp.TimeStrToTime
)
self.assertIsInstance(parse_one("TIME_STR_TO_UNIX(a)"), exp.TimeStrToUnix)
self.assertIsInstance(parse_one("TRIM(LEADING 'b' FROM 'bla')"), exp.Trim)
self.assertIsInstance(parse_one("TS_OR_DS_ADD(a, 1, 'day')"), exp.TsOrDsAdd)
@ -791,6 +820,7 @@ class TestExpressions(unittest.TestCase):
def test_convert(self):
from collections import namedtuple
import pytz
PointTuple = namedtuple("Point", ["x", "y"])
@ -809,11 +839,17 @@ class TestExpressions(unittest.TestCase):
({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, 1),
"TIME_STR_TO_TIME('2022-10-01 01:01:01.000001+00:00')",
"TIME_STR_TO_TIME('2022-10-01 01:01:01.000001')",
),
(
datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc),
"TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00')",
"TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00', 'UTC')",
),
(
pytz.timezone("America/Los_Angeles").localize(
datetime.datetime(2022, 10, 1, 1, 1, 1)
),
"TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')",
),
(datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"),
(math.nan, "NULL"),
@ -829,6 +865,21 @@ class TestExpressions(unittest.TestCase):
"MAP_FROM_ARRAYS(ARRAY('test'), ARRAY('value'))",
)
@unittest.skipUnless(sys.version_info >= (3, 9), "zoneinfo only available from python 3.9+")
def test_convert_python39(self):
import zoneinfo
for value, expected in [
(
datetime.datetime(
2022, 10, 1, 1, 1, 1, tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles")
),
"TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')",
)
]:
with self.subTest(value):
self.assertEqual(exp.convert(value).sql(), expected)
def test_comment_alias(self):
sql = """
SELECT
@ -993,16 +1044,15 @@ FROM foo""",
self.assertEqual(exp.DataType.build("UNKNOWN", dialect="bigquery").sql(), "UNKNOWN")
self.assertEqual(exp.DataType.build("UNKNOWN", dialect="snowflake").sql(), "UNKNOWN")
self.assertEqual(exp.DataType.build("TIMESTAMP", dialect="bigquery").sql(), "TIMESTAMPTZ")
self.assertEqual(
exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>"
)
self.assertEqual(exp.DataType.build("USER-DEFINED").sql(), "USER-DEFINED")
self.assertEqual(exp.DataType.build("ARRAY<UNKNOWN>").sql(), "ARRAY<UNKNOWN>")
self.assertEqual(exp.DataType.build("ARRAY<NULL>").sql(), "ARRAY<NULL>")
self.assertEqual(exp.DataType.build("varchar(100) collate 'en-ci'").sql(), "VARCHAR(100)")
self.assertEqual(exp.DataType.build("int[3]").sql(dialect="duckdb"), "INT[3]")
self.assertEqual(exp.DataType.build("int[3][3]").sql(dialect="duckdb"), "INT[3][3]")
self.assertEqual(
exp.DataType.build("struct<x int>", dialect="spark").sql(), "STRUCT<x INT>"
)
with self.assertRaises(ParseError):
exp.DataType.build("varchar(")
@ -1107,6 +1157,10 @@ FROM foo""",
dtype = exp.DataType.build("a.b.c", udt=True)
assert dtype.is_type("a.b.c")
dtype = exp.DataType.build("Nullable(Int32)", dialect="clickhouse")
assert dtype.is_type("int")
assert not dtype.is_type("int", check_nullable=True)
with self.assertRaises(ParseError):
exp.DataType.build("foo")

View file

@ -1,3 +1,4 @@
import time
import unittest
from sqlglot import exp, parse_one
@ -43,3 +44,10 @@ class TestGenerator(unittest.TestCase):
assert parse_one("X").sql(identify="safe") == "X"
assert parse_one("x as 1").sql(identify="safe") == '"x" AS "1"'
assert parse_one("X as 1").sql(identify="safe") == 'X AS "1"'
def test_generate_nested_binary(self):
sql = "SELECT 'foo'" + (" || 'foo'" * 500)
now = time.time()
self.assertEqual(parse_one(sql).sql(), sql)
self.assertLessEqual(time.time() - now, 0.1)

View file

@ -360,6 +360,37 @@ class TestOptimizer(unittest.TestCase):
"SELECT _q_0.id AS id, _q_0.dt AS dt, _q_0.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS _q_0",
)
# Detection of correlation where columns are referenced in derived tables nested within subqueries
self.assertEqual(
optimizer.qualify.qualify(
parse_one(
"SELECT a.g FROM a WHERE a.e < (SELECT MAX(u) FROM (SELECT SUM(c.b) AS u FROM c WHERE c.d = f GROUP BY c.e) w)"
),
schema={
"a": {"g": "INT", "e": "INT", "f": "INT"},
"c": {"d": "INT", "e": "INT", "b": "INT"},
},
quote_identifiers=False,
).sql(),
"SELECT a.g AS g FROM a AS a WHERE a.e < (SELECT MAX(w.u) AS _col_0 FROM (SELECT SUM(c.b) AS u FROM c AS c WHERE c.d = a.f GROUP BY c.e) AS w)",
)
# Detection of correlation where columns are referenced in derived tables nested within lateral joins
self.assertEqual(
optimizer.qualify.qualify(
parse_one(
"SELECT u.user_id, l.log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date FROM (SELECT l.log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l",
dialect="postgres",
),
schema={
"users": {"user_id": "text", "log_date": "date"},
"logs": {"user_id": "text", "log_date": "date"},
},
quote_identifiers=False,
).sql("postgres"),
"SELECT u.user_id AS user_id, l.log_date AS log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date AS log_date FROM (SELECT l.log_date AS log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l",
)
self.check_file(
"qualify_columns",
qualify_columns,
@ -591,7 +622,7 @@ SELECT
"_q_0"."n_comment" AS "n_comment"
FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|') AS "_q_0"
""".strip(),
optimizer.optimize(expression).sql(pretty=True),
optimizer.optimize(expression, infer_csv_schemas=True).sql(pretty=True),
)
def test_scope(self):
@ -1028,31 +1059,14 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
concat_expr.right.expressions[0].type.this, exp.DataType.Type.VARCHAR
) # x.cola (arg)
# Ensures we don't raise if there are unqualified columns
annotate_types(parse_one("select x from y lateral view explode(y) as x")).expressions[0]
def test_null_annotation(self):
expression = annotate_types(parse_one("SELECT NULL + 2 AS col")).expressions[0].this
self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL)
self.assertEqual(expression.right.type.this, exp.DataType.Type.INT)
# NULL <op> UNKNOWN should yield NULL
sql = "SELECT NULL + SOME_ANONYMOUS_FUNC() AS result"
concat_expr_alias = annotate_types(parse_one(sql)).expressions[0]
self.assertEqual(concat_expr_alias.type.this, exp.DataType.Type.NULL)
concat_expr = concat_expr_alias.this
self.assertEqual(concat_expr.type.this, exp.DataType.Type.NULL)
self.assertEqual(concat_expr.left.type.this, exp.DataType.Type.NULL)
self.assertEqual(concat_expr.right.type.this, exp.DataType.Type.UNKNOWN)
def test_nullable_annotation(self):
nullable = exp.DataType.build("NULLABLE", expressions=exp.DataType.build("BOOLEAN"))
expression = annotate_types(parse_one("NULL AND FALSE"))
self.assertEqual(expression.type, nullable)
self.assertEqual(expression.left.type.this, exp.DataType.Type.NULL)
self.assertEqual(expression.right.type.this, exp.DataType.Type.BOOLEAN)
# NULL <op> UNKNOWN should yield UNKNOWN
self.assertEqual(
annotate_types(parse_one("SELECT NULL + ANONYMOUS_FUNC()")).expressions[0].type.this,
exp.DataType.Type.UNKNOWN,
)
def test_predicate_annotation(self):
expression = annotate_types(parse_one("x BETWEEN a AND b"))
@ -1181,6 +1195,19 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
exp.DataType.build("date"),
)
self.assertEqual(
annotate_types(
optimizer.qualify.qualify(
parse_one(
"SELECT x FROM UNNEST(GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-06 02:00:00', interval 1 day)) AS x"
)
)
)
.selects[0]
.type,
exp.DataType.build("timestamp"),
)
def test_map_annotation(self):
# ToMap annotation
expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb"))
@ -1196,6 +1223,26 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark"))
self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)"))
def test_union_annotation(self):
for left, right, expected_type in (
("SELECT 1::INT AS c", "SELECT 2::BIGINT AS c", "BIGINT"),
("SELECT 1 AS c", "SELECT NULL AS c", "INT"),
("SELECT FOO() AS c", "SELECT 1 AS c", "UNKNOWN"),
("SELECT FOO() AS c", "SELECT BAR() AS c", "UNKNOWN"),
):
with self.subTest(f"left: {left}, right: {right}, expected: {expected_type}"):
lr = annotate_types(parse_one(f"SELECT t.c FROM ({left} UNION ALL {right}) t(c)"))
rl = annotate_types(parse_one(f"SELECT t.c FROM ({right} UNION ALL {left}) t(c)"))
assert lr.selects[0].type == rl.selects[0].type == exp.DataType.build(expected_type)
union_by_name = annotate_types(
parse_one(
"SELECT t.a, t.d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7.0 d, 8::BIGINT a) AS t(a, d)"
)
)
self.assertEqual(union_by_name.selects[0].type.this, exp.DataType.Type.BIGINT)
self.assertEqual(union_by_name.selects[1].type.this, exp.DataType.Type.DOUBLE)
def test_recursive_cte(self):
query = parse_one(
"""

View file

@ -579,12 +579,6 @@ class TestParser(unittest.TestCase):
logger,
)
def test_rename_table(self):
self.assertEqual(
parse_one("ALTER TABLE foo RENAME TO bar").sql(),
"ALTER TABLE foo RENAME TO bar",
)
def test_pivot_columns(self):
nothing_aliased = """
SELECT * FROM (
@ -705,77 +699,19 @@ class TestParser(unittest.TestCase):
def test_parse_nested(self):
now = time.time()
query = parse_one(
"""
SELECT *
FROM a
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
LEFT JOIN b ON a.id = b.id
"""
)
query = parse_one("SELECT * FROM a " + ("LEFT JOIN b ON a.id = b.id " * 38))
self.assertIsNotNone(query)
self.assertLessEqual(time.time() - now, 0.1)
query = parse_one(
"""
SELECT *
FROM a
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
LEFT JOIN UNNEST(ARRAY[])
"""
)
now = time.time()
query = parse_one("SELECT * FROM a " + ("LEFT JOIN UNNEST(ARRAY[]) " * 15))
self.assertIsNotNone(query)
self.assertLessEqual(time.time() - now, 0.2)
self.assertLessEqual(time.time() - now, 0.1)
now = time.time()
query = parse_one("SELECT * FROM a " + ("OUTER APPLY (SELECT * FROM b) " * 30))
self.assertIsNotNone(query)
self.assertLessEqual(time.time() - now, 0.1)
def test_parse_properties(self):
self.assertEqual(
@ -903,3 +839,18 @@ class TestParser(unittest.TestCase):
def test_parse_prop_eq(self):
self.assertIsInstance(parse_one("x(a := b and c)").expressions[0], exp.PropertyEQ)
def test_collate(self):
collates = [
('pg_catalog."default"', exp.Column),
('"en_DE"', exp.Identifier),
("LATIN1_GENERAL_BIN", exp.Var),
("'en'", exp.Literal),
]
for collate_pair in collates:
collate_node = parse_one(
f"""SELECT * FROM t WHERE foo LIKE '%bar%' COLLATE {collate_pair[0]}"""
).find(exp.Collate)
self.assertIsInstance(collate_node, exp.Collate)
self.assertIsInstance(collate_node.expression, collate_pair[1])

View file

@ -202,11 +202,11 @@ class TestSchema(unittest.TestCase):
dialect="clickhouse",
)
table_z = exp.table_("z", db="y", catalog="x")
table_z = exp.table_("Z", db="y", catalog="x")
table_w = exp.table_("w", db="y", catalog="x")
self.assertEqual(schema.column_names(table_z), ["a", "B"])
self.assertEqual(schema.column_names(table_w), ["c"])
self.assertEqual(schema.column_names(table_w), ["C"])
schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse")
self.assertEqual(schema.column_names(exp.table_("x")), ["y"])

View file

@ -733,6 +733,11 @@ FROM x""",
self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="hive")
self.validate("TIME_STR_TO_TIME(x)", "TIME_STR_TO_TIME(x)", write=None)
self.validate(
"TIME_STR_TO_TIME(x, 'America/Los_Angeles')",
"TIME_STR_TO_TIME(x, 'America/Los_Angeles')",
write=None,
)
self.validate("TIME_STR_TO_UNIX(x)", "TIME_STR_TO_UNIX(x)", write=None)
self.validate("TIME_TO_TIME_STR(x)", "CAST(x AS TEXT)", write=None)
self.validate("TIME_TO_STR(x, 'y')", "TIME_TO_STR(x, 'y')", write=None)
@ -845,7 +850,6 @@ FROM x""",
"ALTER TABLE table1 RENAME COLUMN c1 TO c2, c2 TO c3",
"ALTER TABLE table1 RENAME COLUMN c1 c2",
"ALTER TYPE electronic_mail RENAME TO email",
"ALTER VIEW foo ALTER COLUMN bla SET DEFAULT 'NOT SET'",
"ALTER schema doo",
"ANALYZE a.y",
"CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')",