1
0
Fork 0

Adding upstream version 20.9.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:18:57 +01:00
parent 943dfc0887
commit 8a068da99c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
144 changed files with 78309 additions and 59609 deletions

View file

@ -809,7 +809,7 @@ class TestFunctions(unittest.TestCase):
self.assertEqual("DATE_ADD(cola, colb)", col_col_for_add.sql())
current_date_add = SF.date_add(SF.current_date(), 5)
self.assertEqual("DATE_ADD(CURRENT_DATE, 5)", current_date_add.sql())
self.assertEqual("DATEADD(day, 5, CURRENT_DATE)", current_date_add.sql(dialect="snowflake"))
self.assertEqual("DATEADD(DAY, 5, CURRENT_DATE)", current_date_add.sql(dialect="snowflake"))
def test_date_sub(self):
col_str = SF.date_sub("cola", 2)
@ -859,15 +859,15 @@ class TestFunctions(unittest.TestCase):
def test_trunc(self):
col_str = SF.trunc("cola", "year")
self.assertEqual("TRUNC(cola, 'year')", col_str.sql())
self.assertEqual("TRUNC(cola, 'YEAR')", col_str.sql())
col = SF.trunc(SF.col("cola"), "year")
self.assertEqual("TRUNC(cola, 'year')", col.sql())
self.assertEqual("TRUNC(cola, 'YEAR')", col.sql())
def test_date_trunc(self):
col_str = SF.date_trunc("year", "cola")
self.assertEqual("DATE_TRUNC('year', cola)", col_str.sql())
col = SF.date_trunc("year", SF.col("cola"))
self.assertEqual("DATE_TRUNC('year', cola)", col.sql())
self.assertEqual("DATE_TRUNC('YEAR', cola)", col_str.sql())
col = SF.date_trunc("YEAR", SF.col("cola"))
self.assertEqual("DATE_TRUNC('YEAR', cola)", col.sql())
def test_next_day(self):
col_str = SF.next_day("cola", "Mon")

View file

@ -61,7 +61,7 @@ class TestDataframeSession(DataFrameSQLValidator):
]
)
df = self.spark.createDataFrame([[{"sub_cola": 1, "sub_colb": "test"}]], schema)
expected = "SELECT CAST(`a2`.`cola` AS STRUCT<`sub_cola`: INT, `sub_colb`: STRING>) AS `cola` FROM VALUES (STRUCT(1 AS `sub_cola`, 'test' AS `sub_colb`)) AS `a2`(`cola`)"
expected = "SELECT `a2`.`cola` AS `cola` FROM VALUES (STRUCT(1 AS `sub_cola`, 'test' AS `sub_colb`)) AS `a2`(`cola`)"
self.compare_sql(df, expected)

View file

@ -17,6 +17,16 @@ class TestBigQuery(Validator):
maxDiff = None
def test_bigquery(self):
with self.assertLogs(helper_logger) as cm:
self.validate_identity(
"SELECT * FROM t AS t(c1, c2)",
"SELECT * FROM t AS t",
)
self.assertEqual(
cm.output, ["WARNING:sqlglot:Named columns are not supported in table alias."]
)
with self.assertLogs(helper_logger) as cm:
self.validate_all(
"SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]",
@ -76,6 +86,9 @@ class TestBigQuery(Validator):
["FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word", "END FOR"],
)
self.validate_identity("SELECT * FROM dataset.my_table TABLESAMPLE SYSTEM (10 PERCENT)")
self.validate_identity("TIME('2008-12-25 15:30:00+08')")
self.validate_identity("TIME('2008-12-25 15:30:00+08', 'America/Los_Angeles')")
self.validate_identity("SELECT test.Unknown FROM test")
self.validate_identity(r"SELECT '\n\r\a\v\f\t'")
self.validate_identity("SELECT * FROM tbl FOR SYSTEM_TIME AS OF z")
@ -130,6 +143,7 @@ class TestBigQuery(Validator):
self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1")
self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t")
self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t")
self.validate_identity("""SELECT JSON_EXTRACT_SCALAR('{"a": 5}', '$.a')""")
self.validate_identity(
"FOR record IN (SELECT word, word_count FROM bigquery-public-data.samples.shakespeare LIMIT 5) DO SELECT record.word, record.word_count"
)
@ -154,6 +168,33 @@ class TestBigQuery(Validator):
self.validate_identity(
"SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)",
)
self.validate_identity(
"""SELECT JSON_EXTRACT_SCALAR('5')""", """SELECT JSON_EXTRACT_SCALAR('5', '$')"""
)
self.validate_identity(
"SELECT SPLIT(foo)",
"SELECT SPLIT(foo, ',')",
)
self.validate_identity(
"SELECT 1 AS hash",
"SELECT 1 AS `hash`",
)
self.validate_identity(
"SELECT 1 AS at",
"SELECT 1 AS `at`",
)
self.validate_identity(
'x <> ""',
"x <> ''",
)
self.validate_identity(
'x <> """"""',
"x <> ''",
)
self.validate_identity(
"x <> ''''''",
"x <> ''",
)
self.validate_identity(
"SELECT a overlaps",
"SELECT a AS overlaps",
@ -179,13 +220,122 @@ class TestBigQuery(Validator):
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
)
self.validate_all("SELECT SPLIT(foo)", write={"bigquery": "SELECT SPLIT(foo, ',')"})
self.validate_all("SELECT 1 AS hash", write={"bigquery": "SELECT 1 AS `hash`"})
self.validate_all("SELECT 1 AS at", write={"bigquery": "SELECT 1 AS `at`"})
self.validate_all('x <> ""', write={"bigquery": "x <> ''"})
self.validate_all('x <> """"""', write={"bigquery": "x <> ''"})
self.validate_all("x <> ''''''", write={"bigquery": "x <> ''"})
self.validate_all("CAST(x AS DATETIME)", read={"": "x::timestamp"})
self.validate_all(
"SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))",
read={
"spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))",
},
write={
"bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))",
"spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))",
},
)
self.validate_all(
"SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))",
write={
"bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))",
"spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS `1`, (Q3, Q4) AS `2`))",
},
)
self.validate_all(
"SELECT UNIX_DATE(DATE '2008-12-25')",
write={
"bigquery": "SELECT UNIX_DATE(CAST('2008-12-25' AS DATE))",
"duckdb": "SELECT DATE_DIFF('DAY', CAST('1970-01-01' AS DATE), CAST('2008-12-25' AS DATE))",
},
)
self.validate_all(
"SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
read={
"snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONS)",
},
write={
"bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
"presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))",
"redshift": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)",
"spark": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"tsql": "SELECT EOMONTH(CAST('2008-11-25' AS DATE))",
},
)
self.validate_all(
"SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)",
read={
"snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)",
},
write={
"duckdb": UnsupportedError,
"bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)",
"snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)",
},
)
self.validate_all(
"CAST(x AS DATETIME)",
read={
"": "x::timestamp",
},
)
self.validate_all(
"SELECT TIME(15, 30, 00)",
read={
"duckdb": "SELECT MAKE_TIME(15, 30, 00)",
"mysql": "SELECT MAKETIME(15, 30, 00)",
"postgres": "SELECT MAKE_TIME(15, 30, 00)",
"snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)",
},
write={
"bigquery": "SELECT TIME(15, 30, 00)",
"duckdb": "SELECT MAKE_TIME(15, 30, 00)",
"mysql": "SELECT MAKETIME(15, 30, 00)",
"postgres": "SELECT MAKE_TIME(15, 30, 00)",
"snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)",
"tsql": "SELECT TIMEFROMPARTS(15, 30, 00, 0, 0)",
},
)
self.validate_all(
"SELECT TIME('2008-12-25 15:30:00')",
write={
"bigquery": "SELECT TIME('2008-12-25 15:30:00')",
"duckdb": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
"mysql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
"postgres": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
"redshift": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
"spark": "SELECT CAST('2008-12-25 15:30:00' AS TIMESTAMP)",
"tsql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
},
)
self.validate_all(
"SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
write={
"bigquery": "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')",
},
)
self.validate_all(
"SELECT COUNTIF(x)",
read={
"clickhouse": "SELECT countIf(x)",
"duckdb": "SELECT COUNT_IF(x)",
},
write={
"bigquery": "SELECT COUNTIF(x)",
"clickhouse": "SELECT countIf(x)",
"duckdb": "SELECT COUNT_IF(x)",
},
)
self.validate_all(
"SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), minute)",
write={
"bigquery": "SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), MINUTE)",
"duckdb": "SELECT DATE_DIFF('MINUTE', TO_TIMESTAMP(0), TO_TIMESTAMP(60))",
"snowflake": "SELECT TIMESTAMPDIFF(MINUTE, TO_TIMESTAMP(0), TO_TIMESTAMP(60))",
},
)
self.validate_all(
"SELECT TIMESTAMP_MICROS(x)",
read={
@ -195,7 +345,7 @@ class TestBigQuery(Validator):
write={
"bigquery": "SELECT TIMESTAMP_MICROS(x)",
"duckdb": "SELECT MAKE_TIMESTAMP(x)",
"snowflake": "SELECT TO_TIMESTAMP(x / 1000, 3)",
"snowflake": "SELECT TO_TIMESTAMP(x, 6)",
"spark": "SELECT TIMESTAMP_MICROS(x)",
},
)
@ -349,7 +499,7 @@ class TestBigQuery(Validator):
},
)
self.validate_all(
"WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))",
"WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))",
read={
"spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte"
},

View file

@ -26,6 +26,7 @@ class TestClickhouse(Validator):
self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)")
self.assertIsNone(expr._meta)
self.validate_identity("countIf(x, y)")
self.validate_identity("x = y")
self.validate_identity("x <> y")
self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)")
@ -44,10 +45,10 @@ class TestClickhouse(Validator):
self.validate_identity("ATTACH DATABASE DEFAULT ENGINE = ORDINARY")
self.validate_identity("CAST(['hello'], 'Array(Enum8(''hello'' = 1))')")
self.validate_identity("SELECT x, COUNT() FROM y GROUP BY x WITH TOTALS")
self.validate_identity("SELECT INTERVAL t.days day")
self.validate_identity("SELECT INTERVAL t.days DAY")
self.validate_identity("SELECT match('abc', '([a-z]+)')")
self.validate_identity("dictGet(x, 'y')")
self.validate_identity("SELECT * FROM final")
self.validate_identity("WITH final AS (SELECT 1) SELECT * FROM final")
self.validate_identity("SELECT * FROM x FINAL")
self.validate_identity("SELECT * FROM x AS y FINAL")
self.validate_identity("'a' IN mapKeys(map('a', 1, 'b', 2))")
@ -60,6 +61,9 @@ class TestClickhouse(Validator):
self.validate_identity("SELECT * FROM foo GLOBAL LEFT ANY JOIN bla")
self.validate_identity("SELECT quantile(0.5)(a)")
self.validate_identity("SELECT quantiles(0.5)(a) AS x FROM t")
self.validate_identity("SELECT quantilesIf(0.5)(a, a > 1) AS x FROM t")
self.validate_identity("SELECT quantileState(0.5)(a) AS x FROM t")
self.validate_identity("SELECT deltaSumMerge(a) AS x FROM t")
self.validate_identity("SELECT quantiles(0.1, 0.2, 0.3)(a)")
self.validate_identity("SELECT quantileTiming(0.5)(RANGE(100))")
self.validate_identity("SELECT histogram(5)(a)")
@ -132,7 +136,7 @@ class TestClickhouse(Validator):
},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' microsecond",
"SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND",
read={
"duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
"postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'",
@ -175,27 +179,27 @@ class TestClickhouse(Validator):
},
)
self.validate_all(
"DATE_ADD('day', 1, x)",
"DATE_ADD(DAY, 1, x)",
read={
"clickhouse": "dateAdd(day, 1, x)",
"presto": "DATE_ADD('day', 1, x)",
"clickhouse": "dateAdd(DAY, 1, x)",
"presto": "DATE_ADD('DAY', 1, x)",
},
write={
"clickhouse": "DATE_ADD('day', 1, x)",
"presto": "DATE_ADD('day', 1, x)",
"": "DATE_ADD(x, 1, 'day')",
"clickhouse": "DATE_ADD(DAY, 1, x)",
"presto": "DATE_ADD('DAY', 1, x)",
"": "DATE_ADD(x, 1, 'DAY')",
},
)
self.validate_all(
"DATE_DIFF('day', a, b)",
"DATE_DIFF(DAY, a, b)",
read={
"clickhouse": "dateDiff('day', a, b)",
"presto": "DATE_DIFF('day', a, b)",
"clickhouse": "dateDiff(DAY, a, b)",
"presto": "DATE_DIFF('DAY', a, b)",
},
write={
"clickhouse": "DATE_DIFF('day', a, b)",
"presto": "DATE_DIFF('day', a, b)",
"": "DATEDIFF(b, a, day)",
"clickhouse": "DATE_DIFF(DAY, a, b)",
"presto": "DATE_DIFF('DAY', a, b)",
"": "DATEDIFF(b, a, DAY)",
},
)
self.validate_all(
@ -722,3 +726,5 @@ LIFETIME(MIN 0 MAX 0)""",
},
pretty=True,
)
self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""")
self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""")

View file

@ -1,3 +1,5 @@
from sqlglot import transpile
from sqlglot.errors import ParseError
from tests.dialects.test_dialect import Validator
@ -28,6 +30,14 @@ class TestDatabricks(Validator):
"SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))"
)
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$"
)
self.validate_identity(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$"
)
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
write={
@ -42,6 +52,18 @@ class TestDatabricks(Validator):
},
)
with self.assertRaises(ParseError):
transpile(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo$def add_one(x):\n return x+1$$",
read="databricks",
)
with self.assertRaises(ParseError):
transpile(
"CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo bar$def add_one(x):\n return x+1$foo bar$",
read="databricks",
)
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json(self):
self.validate_identity("""SELECT c1 : price FROM VALUES ('{ "price": 5 }') AS T(c1)""")
@ -81,77 +103,77 @@ class TestDatabricks(Validator):
self.validate_all(
"SELECT DATEDIFF(year, 'start', 'end')",
write={
"tsql": "SELECT DATEDIFF(year, 'start', 'end')",
"databricks": "SELECT DATEDIFF(year, 'start', 'end')",
"tsql": "SELECT DATEDIFF(YEAR, 'start', 'end')",
"databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')",
},
)
self.validate_all(
"SELECT DATEDIFF(microsecond, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(microsecond, 'start', 'end')",
"databricks": "SELECT DATEDIFF(MICROSECOND, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000000 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(millisecond, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(millisecond, 'start', 'end')",
"databricks": "SELECT DATEDIFF(MILLISECOND, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(second, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(second, 'start', 'end')",
"databricks": "SELECT DATEDIFF(SECOND, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(minute, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(minute, 'start', 'end')",
"databricks": "SELECT DATEDIFF(MINUTE, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 60 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(hour, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(hour, 'start', 'end')",
"databricks": "SELECT DATEDIFF(HOUR, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 3600 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(day, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(day, 'start', 'end')",
"databricks": "SELECT DATEDIFF(DAY, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 86400 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(week, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(week, 'start', 'end')",
"databricks": "SELECT DATEDIFF(WEEK, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP))) / 7 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(month, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(month, 'start', 'end')",
"databricks": "SELECT DATEDIFF(MONTH, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(quarter, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(quarter, 'start', 'end')",
"databricks": "SELECT DATEDIFF(QUARTER, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) / 3 AS BIGINT)",
},
)
self.validate_all(
"SELECT DATEDIFF(year, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(year, 'start', 'end')",
"databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')",
"postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)",
},
)
@ -160,8 +182,8 @@ class TestDatabricks(Validator):
self.validate_all(
"SELECT DATEADD(year, 1, '2020-01-01')",
write={
"tsql": "SELECT DATEADD(year, 1, '2020-01-01')",
"databricks": "SELECT DATEADD(year, 1, '2020-01-01')",
"tsql": "SELECT DATEADD(YEAR, 1, '2020-01-01')",
"databricks": "SELECT DATEADD(YEAR, 1, '2020-01-01')",
},
)
self.validate_all(

View file

@ -104,6 +104,16 @@ class TestDialect(Validator):
"Please use the correct format: 'dialect [, k1 = v2 [, ...]]'.",
)
with self.assertRaises(ValueError) as cm:
Dialect.get_or_raise("myqsl")
self.assertEqual(str(cm.exception), "Unknown dialect 'myqsl'. Did you mean mysql?")
with self.assertRaises(ValueError) as cm:
Dialect.get_or_raise("asdfjasodiufjsd")
self.assertEqual(str(cm.exception), "Unknown dialect 'asdfjasodiufjsd'.")
def test_compare_dialects(self):
bigquery_class = Dialect["bigquery"]
bigquery_object = BigQuery()
@ -767,32 +777,32 @@ class TestDialect(Validator):
"duckdb": "x + INTERVAL 1 DAY",
"hive": "DATE_ADD(x, 1)",
"mysql": "DATE_ADD(x, INTERVAL 1 DAY)",
"presto": "DATE_ADD('day', 1, x)",
"presto": "DATE_ADD('DAY', 1, x)",
"spark": "DATE_ADD(x, 1)",
"starrocks": "DATE_ADD(x, INTERVAL 1 DAY)",
"doris": "DATE_ADD(x, INTERVAL 1 DAY)",
},
)
self.validate_all(
"DATE_TRUNC('day', x)",
"DATE_TRUNC('DAY', x)",
read={
"bigquery": "DATE_TRUNC(x, day)",
"spark": "TRUNC(x, 'day')",
},
write={
"bigquery": "DATE_TRUNC(x, day)",
"duckdb": "DATE_TRUNC('day', x)",
"bigquery": "DATE_TRUNC(x, DAY)",
"duckdb": "DATE_TRUNC('DAY', x)",
"mysql": "DATE(x)",
"presto": "DATE_TRUNC('day', x)",
"postgres": "DATE_TRUNC('day', x)",
"snowflake": "DATE_TRUNC('day', x)",
"starrocks": "DATE_TRUNC('day', x)",
"spark": "TRUNC(x, 'day')",
"doris": "DATE_TRUNC(x, 'day')",
"presto": "DATE_TRUNC('DAY', x)",
"postgres": "DATE_TRUNC('DAY', x)",
"snowflake": "DATE_TRUNC('DAY', x)",
"starrocks": "DATE_TRUNC('DAY', x)",
"spark": "TRUNC(x, 'DAY')",
"doris": "DATE_TRUNC(x, 'DAY')",
},
)
self.validate_all(
"TIMESTAMP_TRUNC(x, day)",
"TIMESTAMP_TRUNC(x, DAY)",
read={
"bigquery": "TIMESTAMP_TRUNC(x, day)",
"duckdb": "DATE_TRUNC('day', x)",
@ -805,18 +815,18 @@ class TestDialect(Validator):
},
)
self.validate_all(
"DATE_TRUNC('day', CAST(x AS DATE))",
"DATE_TRUNC('DAY', CAST(x AS DATE))",
read={
"presto": "DATE_TRUNC('day', x::DATE)",
"snowflake": "DATE_TRUNC('day', x::DATE)",
"presto": "DATE_TRUNC('DAY', x::DATE)",
"snowflake": "DATE_TRUNC('DAY', x::DATE)",
},
)
self.validate_all(
"TIMESTAMP_TRUNC(CAST(x AS DATE), day)",
"TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)",
read={"postgres": "DATE_TRUNC('day', x::DATE)"},
)
self.validate_all(
"TIMESTAMP_TRUNC(CAST(x AS DATE), day)",
"TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)",
read={"starrocks": "DATE_TRUNC('day', x::DATE)"},
)
self.validate_all(
@ -850,23 +860,23 @@ class TestDialect(Validator):
},
)
self.validate_all(
"DATE_TRUNC('year', x)",
"DATE_TRUNC('YEAR', x)",
read={
"bigquery": "DATE_TRUNC(x, year)",
"spark": "TRUNC(x, 'year')",
},
write={
"bigquery": "DATE_TRUNC(x, year)",
"bigquery": "DATE_TRUNC(x, YEAR)",
"mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')",
"postgres": "DATE_TRUNC('year', x)",
"snowflake": "DATE_TRUNC('year', x)",
"starrocks": "DATE_TRUNC('year', x)",
"spark": "TRUNC(x, 'year')",
"doris": "DATE_TRUNC(x, 'year')",
"postgres": "DATE_TRUNC('YEAR', x)",
"snowflake": "DATE_TRUNC('YEAR', x)",
"starrocks": "DATE_TRUNC('YEAR', x)",
"spark": "TRUNC(x, 'YEAR')",
"doris": "DATE_TRUNC(x, 'YEAR')",
},
)
self.validate_all(
"TIMESTAMP_TRUNC(x, year)",
"TIMESTAMP_TRUNC(x, YEAR)",
read={
"bigquery": "TIMESTAMP_TRUNC(x, year)",
"postgres": "DATE_TRUNC(year, x)",
@ -875,9 +885,9 @@ class TestDialect(Validator):
"starrocks": "DATE_TRUNC('year', x)",
},
write={
"bigquery": "TIMESTAMP_TRUNC(x, year)",
"spark": "DATE_TRUNC('year', x)",
"doris": "DATE_TRUNC(x, 'year')",
"bigquery": "TIMESTAMP_TRUNC(x, YEAR)",
"spark": "DATE_TRUNC('YEAR', x)",
"doris": "DATE_TRUNC(x, 'YEAR')",
},
)
self.validate_all(
@ -952,7 +962,7 @@ class TestDialect(Validator):
"drill": "DATE_ADD(CAST('2020-01-01' AS DATE), INTERVAL 1 DAY)",
"duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY",
"hive": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)",
"presto": "DATE_ADD('day', 1, CAST('2020-01-01' AS DATE))",
"presto": "DATE_ADD('DAY', 1, CAST('2020-01-01' AS DATE))",
"spark": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)",
},
)
@ -1003,7 +1013,7 @@ class TestDialect(Validator):
)
self.validate_all(
f"{unit}(TS_OR_DS_TO_DATE(x))",
read={
write={
dialect: f"{unit}(x)"
for dialect in (
"mysql",
@ -1011,7 +1021,10 @@ class TestDialect(Validator):
"starrocks",
)
},
write={
)
self.validate_all(
f"{unit}(CAST(x AS DATE))",
read={
dialect: f"{unit}(x)"
for dialect in (
"mysql",
@ -1670,6 +1683,28 @@ class TestDialect(Validator):
)
def test_alias(self):
self.validate_all(
'SELECT 1 AS "foo"',
read={
"mysql": "SELECT 1 'foo'",
"sqlite": "SELECT 1 'foo'",
"tsql": "SELECT 1 'foo'",
},
)
for dialect in (
"presto",
"hive",
"postgres",
"clickhouse",
"bigquery",
"snowflake",
"duckdb",
):
with self.subTest(f"string alias: {dialect}"):
with self.assertRaises(ParseError):
parse_one("SELECT 1 'foo'", dialect=dialect)
self.validate_all(
"SELECT a AS b FROM x GROUP BY b",
write={

View file

@ -4,6 +4,29 @@ from tests.dialects.test_dialect import Validator
class TestDoris(Validator):
dialect = "doris"
def test_doris(self):
self.validate_all(
"SELECT TO_DATE('2020-02-02 00:00:00')",
write={
"doris": "SELECT TO_DATE('2020-02-02 00:00:00')",
"oracle": "SELECT CAST('2020-02-02 00:00:00' AS DATE)",
},
)
self.validate_all(
"SELECT MAX_BY(a, b), MIN_BY(c, d)",
read={"clickhouse": "SELECT argMax(a, b), argMin(c, d)"},
)
self.validate_all(
"SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))",
read={
"clickhouse": "SELECT arraySum(x -> x*x, [2, 3])",
},
write={
"clickhouse": "SELECT arraySum(x -> x * x, [2, 3])",
"doris": "SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))",
},
)
def test_identity(self):
self.validate_identity("COALECSE(a, b, c, d)")
self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")

View file

@ -7,6 +7,41 @@ class TestDuckDB(Validator):
dialect = "duckdb"
def test_duckdb(self):
struct_pack = parse_one('STRUCT_PACK("a b" := 1)', read="duckdb")
self.assertIsInstance(struct_pack.expressions[0].this, exp.Identifier)
self.assertEqual(struct_pack.sql(dialect="duckdb"), "{'a b': 1}")
self.validate_all(
"SELECT SUM(X) OVER (ORDER BY x)",
write={
"bigquery": "SELECT SUM(X) OVER (ORDER BY x NULLS LAST)",
"duckdb": "SELECT SUM(X) OVER (ORDER BY x)",
"mysql": "SELECT SUM(X) OVER (ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x)",
},
)
self.validate_all(
"SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)",
write={
"bigquery": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)",
"duckdb": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)",
"mysql": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)",
},
)
self.validate_all(
"SELECT * FROM x ORDER BY 1 NULLS LAST",
write={
"duckdb": "SELECT * FROM x ORDER BY 1",
"mysql": "SELECT * FROM x ORDER BY 1",
},
)
self.validate_all(
"CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)",
read={"bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)"},
)
self.validate_identity("SELECT 1 WHERE x > $1")
self.validate_identity("SELECT 1 WHERE x > $name")
self.assertEqual(
parse_one("select * from t limit (select 5)").sql(dialect="duckdb"),
exp.select("*").from_("t").limit(exp.select("5").subquery()).sql(dialect="duckdb"),
@ -54,12 +89,36 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
"""SELECT JSON('{"fruit":"banana"}') -> 'fruit'""",
write={
"duckdb": """SELECT JSON('{"fruit":"banana"}') -> 'fruit'""",
"snowflake": """SELECT PARSE_JSON('{"fruit":"banana"}')['fruit']""",
},
)
self.validate_all(
"""SELECT JSON('{"fruit": {"foo": "banana"}}') -> 'fruit' -> 'foo'""",
write={
"duckdb": """SELECT JSON('{"fruit": {"foo": "banana"}}') -> 'fruit' -> 'foo'""",
"snowflake": """SELECT PARSE_JSON('{"fruit": {"foo": "banana"}}')['fruit']['foo']""",
},
)
self.validate_all(
"WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data",
write={
"bigquery": "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT (SELECT col.b FROM UNNEST(col) AS col WHERE col.a = 1) FROM _data",
"duckdb": "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT (SELECT col['b'] FROM UNNEST(col) AS t(col) WHERE col['a'] = 1) FROM _data",
},
)
self.validate_all(
"SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table",
read={
"bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table",
"duckdb": "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table",
},
write={
"bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table",
},
)
self.validate_all(
"WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER (WHERE x > 1) FROM cte",
@ -86,8 +145,8 @@ class TestDuckDB(Validator):
self.validate_all(
"SELECT UNNEST(ARRAY[1, 2, 3]), UNNEST(ARRAY[4, 5]), UNNEST(ARRAY[6]) FROM x",
write={
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM x, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM x, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))",
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM x CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM x CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))",
},
)
self.validate_all(
@ -164,6 +223,13 @@ class TestDuckDB(Validator):
self.validate_all("0x1010", write={"": "0 AS x1010"})
self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"})
self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'})
self.validate_all(
"SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))",
write={
"duckdb": "SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))",
"tsql": "SELECT FORMAT(CAST('2020-01-01' AS DATETIME2), CONCAT('yyyy', 'MM'))",
},
)
self.validate_all(
"SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))",
read={
@ -188,7 +254,7 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
"DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
"DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))",
read={
"duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
"hive": "DATEDIFF(a, b)",
@ -226,15 +292,15 @@ class TestDuckDB(Validator):
self.validate_all(
"""SELECT DATEDIFF('day', t1."A", t1."B") FROM "table" AS t1""",
write={
"duckdb": """SELECT DATE_DIFF('day', t1."A", t1."B") FROM "table" AS t1""",
"trino": """SELECT DATE_DIFF('day', t1."A", t1."B") FROM "table" AS t1""",
"duckdb": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""",
"trino": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""",
},
)
self.validate_all(
"SELECT DATE_DIFF('day', DATE '2020-01-01', DATE '2020-01-05')",
write={
"duckdb": "SELECT DATE_DIFF('day', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))",
"trino": "SELECT DATE_DIFF('day', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))",
"duckdb": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))",
"trino": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))",
},
)
self.validate_all(
@ -366,7 +432,7 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
"IF(y <> 0, x / y, NULL)",
"IF((y) <> 0, (x) / (y), NULL)",
read={
"bigquery": "SAFE_DIVIDE(x, y)",
},
@ -374,6 +440,7 @@ class TestDuckDB(Validator):
self.validate_all(
"STRUCT_PACK(x := 1, y := '2')",
write={
"bigquery": "STRUCT(1 AS x, '2' AS y)",
"duckdb": "{'x': 1, 'y': '2'}",
"spark": "STRUCT(1 AS x, '2' AS y)",
},
@ -381,6 +448,7 @@ class TestDuckDB(Validator):
self.validate_all(
"STRUCT_PACK(key1 := 'value1', key2 := 42)",
write={
"bigquery": "STRUCT('value1' AS key1, 42 AS key2)",
"duckdb": "{'key1': 'value1', 'key2': 42}",
"spark": "STRUCT('value1' AS key1, 42 AS key2)",
},
@ -478,6 +546,8 @@ class TestDuckDB(Validator):
"SELECT CAST('2020-05-06' AS DATE) + INTERVAL 5 DAY",
read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"},
)
self.validate_identity("SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t")
self.validate_identity("SELECT PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t")
self.validate_all(
"SELECT QUANTILE_CONT(x, q) FROM t",
write={
@ -513,6 +583,14 @@ class TestDuckDB(Validator):
self.validate_identity("SELECT ISNAN(x)")
self.validate_all(
"SELECT COUNT_IF(x)",
write={
"duckdb": "SELECT COUNT_IF(x)",
"bigquery": "SELECT COUNTIF(x)",
},
)
def test_array_index(self):
with self.assertLogs(helper_logger) as cm:
self.validate_all(
@ -569,10 +647,10 @@ class TestDuckDB(Validator):
)
self.validate_all(
"SELECT INTERVAL '1 quarter'",
write={"duckdb": "SELECT (90 * INTERVAL '1' day)"},
write={"duckdb": "SELECT (90 * INTERVAL '1' DAY)"},
)
self.validate_all(
"SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) day) + (7 * INTERVAL (-5) day))) AS t1",
"SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) DAY) + (7 * INTERVAL (-5) DAY))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
@ -594,7 +672,7 @@ class TestDuckDB(Validator):
write={
"bigquery": "TIMESTAMP_MILLIS(x)",
"duckdb": "EPOCH_MS(x)",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / 1000)",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
"spark": "TIMESTAMP_MILLIS(x)",
},
)
@ -659,36 +737,44 @@ class TestDuckDB(Validator):
)
def test_sample(self):
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl USING SAMPLE 5",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (5)"},
"SELECT * FROM tbl USING SAMPLE (5 ROWS)",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl USING SAMPLE 10%",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (10 PERCENT)"},
"SELECT * FROM tbl USING SAMPLE (10 PERCENT)",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl USING SAMPLE 10 PERCENT (bernoulli)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)"},
"SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl USING SAMPLE reservoir(50 ROWS) REPEATABLE (100)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)"},
"SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl USING SAMPLE 10% (system, 377)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)"},
"SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl TABLESAMPLE RESERVOIR(20%), tbl2 WHERE tbl.i=tbl2.i",
write={
"duckdb": "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i"
},
"SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i",
)
self.validate_all(
self.validate_identity(
"SELECT * FROM tbl, tbl2 WHERE tbl.i=tbl2.i USING SAMPLE RESERVOIR(20%)",
"SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)",
)
self.validate_all(
"SELECT * FROM example TABLESAMPLE (3 ROWS) REPEATABLE (82)",
read={
"duckdb": "SELECT * FROM example TABLESAMPLE (3) REPEATABLE (82)",
"snowflake": "SELECT * FROM example SAMPLE (3 ROWS) SEED (82)",
},
write={
"duckdb": "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)"
"duckdb": "SELECT * FROM example TABLESAMPLE (3 ROWS) REPEATABLE (82)",
"snowflake": "SELECT * FROM example TABLESAMPLE (3 ROWS) SEED (82)",
},
)

View file

@ -296,11 +296,11 @@ class TestHive(Validator):
self.validate_all(
"DATEDIFF(a, b)",
write={
"duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
"presto": "DATE_DIFF('day', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))",
"duckdb": "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"": "DATEDIFF(TS_OR_DS_TO_DATE(a), TS_OR_DS_TO_DATE(b))",
"": "DATEDIFF(CAST(a AS DATE), CAST(b AS DATE))",
},
)
self.validate_all(
@ -358,11 +358,11 @@ class TestHive(Validator):
self.validate_all(
"DATEDIFF(TO_DATE(y), x)",
write={
"duckdb": "DATE_DIFF('day', CAST(x AS DATE), CAST(CAST(y AS DATE) AS DATE))",
"presto": "DATE_DIFF('day', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))",
"duckdb": "DATE_DIFF('DAY', CAST(x AS DATE), CAST(y AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"spark": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"": "DATEDIFF(TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE(y)), TS_OR_DS_TO_DATE(x))",
"": "DATEDIFF(CAST(y AS DATE), CAST(x AS DATE))",
},
)
self.validate_all(
@ -399,6 +399,7 @@ class TestHive(Validator):
)
def test_hive(self):
self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))")
self.validate_identity("SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1)")
self.validate_identity("SELECT * FROM my_table VERSION AS OF DATE_ADD(CURRENT_DATE, -1)")
@ -737,9 +738,11 @@ class TestHive(Validator):
"SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo",
read={
"presto": "SELECT * FROM x AS foo TABLESAMPLE BERNOULLI (1)",
"snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)",
},
write={
"hive": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo",
"snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)",
"spark": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo",
},
)

View file

@ -533,45 +533,45 @@ class TestMySQL(Validator):
self.validate_all(
"SELECT DATEDIFF(x, y)",
read={
"presto": "SELECT DATE_DIFF('day', y, x)",
"redshift": "SELECT DATEDIFF(day, y, x)",
"presto": "SELECT DATE_DIFF('DAY', y, x)",
"redshift": "SELECT DATEDIFF(DAY, y, x)",
},
write={
"mysql": "SELECT DATEDIFF(x, y)",
"presto": "SELECT DATE_DIFF('day', y, x)",
"redshift": "SELECT DATEDIFF(day, y, x)",
"presto": "SELECT DATE_DIFF('DAY', y, x)",
"redshift": "SELECT DATEDIFF(DAY, y, x)",
},
)
self.validate_all(
"DAYOFYEAR(x)",
write={
"mysql": "DAYOFYEAR(x)",
"": "DAY_OF_YEAR(TS_OR_DS_TO_DATE(x))",
"": "DAY_OF_YEAR(CAST(x AS DATE))",
},
)
self.validate_all(
"DAYOFMONTH(x)",
write={"mysql": "DAYOFMONTH(x)", "": "DAY_OF_MONTH(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "DAYOFMONTH(x)", "": "DAY_OF_MONTH(CAST(x AS DATE))"},
)
self.validate_all(
"DAYOFWEEK(x)",
write={"mysql": "DAYOFWEEK(x)", "": "DAY_OF_WEEK(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "DAYOFWEEK(x)", "": "DAY_OF_WEEK(CAST(x AS DATE))"},
)
self.validate_all(
"WEEKOFYEAR(x)",
write={"mysql": "WEEKOFYEAR(x)", "": "WEEK_OF_YEAR(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "WEEKOFYEAR(x)", "": "WEEK_OF_YEAR(CAST(x AS DATE))"},
)
self.validate_all(
"DAY(x)",
write={"mysql": "DAY(x)", "": "DAY(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "DAY(x)", "": "DAY(CAST(x AS DATE))"},
)
self.validate_all(
"WEEK(x)",
write={"mysql": "WEEK(x)", "": "WEEK(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "WEEK(x)", "": "WEEK(CAST(x AS DATE))"},
)
self.validate_all(
"YEAR(x)",
write={"mysql": "YEAR(x)", "": "YEAR(TS_OR_DS_TO_DATE(x))"},
write={"mysql": "YEAR(x)", "": "YEAR(CAST(x AS DATE))"},
)
self.validate_all(
"DATE(x)",
@ -1005,7 +1005,7 @@ COMMENT='客户账户表'"""
self.validate_all(
"MONTHNAME(x)",
write={
"": "TIME_TO_STR(TS_OR_DS_TO_DATE(x), '%B')",
"": "TIME_TO_STR(CAST(x AS DATE), '%B')",
"mysql": "DATE_FORMAT(x, '%M')",
},
)

View file

@ -7,14 +7,13 @@ class TestOracle(Validator):
dialect = "oracle"
def test_oracle(self):
self.validate_identity("REGEXP_REPLACE('source', 'search')")
parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol", dialect="oracle").assert_is(
exp.AlterTable
)
self.validate_identity("CURRENT_TIMESTAMP(precision)")
self.validate_identity("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol")
self.assertIsInstance(
parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol", dialect="oracle"),
exp.AlterTable,
)
self.validate_identity(
"ALTER TABLE Payments ADD (Stock NUMBER NOT NULL, dropid VARCHAR2(500) NOT NULL)"
)
self.validate_identity("ALTER TABLE Payments ADD Stock NUMBER NOT NULL")
self.validate_identity("SELECT x FROM t WHERE cond FOR UPDATE")
self.validate_identity("SELECT JSON_OBJECT(k1: v1 FORMAT JSON, k2: v2 FORMAT JSON)")
@ -34,7 +33,14 @@ class TestOracle(Validator):
self.validate_identity("SELECT STANDARD_HASH('hello', 'MD5')")
self.validate_identity("SELECT * FROM table_name@dblink_name.database_link_domain")
self.validate_identity("SELECT * FROM table_name SAMPLE (25) s")
self.validate_identity("SELECT COUNT(*) * 10 FROM orders SAMPLE (10) SEED (1)")
self.validate_identity("SELECT * FROM V$SESSION")
self.validate_identity(
"SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name"
)
self.validate_identity(
"ALTER TABLE Payments ADD (Stock NUMBER NOT NULL, dropid VARCHAR2(500) NOT NULL)"
)
self.validate_identity(
"SELECT JSON_ARRAYAGG(JSON_OBJECT('RNK': RNK, 'RATING_CODE': RATING_CODE, 'DATE_VALUE': DATE_VALUE, 'AGENT_ID': AGENT_ID RETURNING CLOB) RETURNING CLOB) AS JSON_DATA FROM tablename"
)
@ -50,6 +56,18 @@ class TestOracle(Validator):
self.validate_identity(
"SELECT MIN(column_name) KEEP (DENSE_RANK FIRST ORDER BY column_name DESC) FROM table_name"
)
self.validate_identity(
"""SELECT JSON_OBJECT(KEY 'key1' IS emp.column1, KEY 'key2' IS emp.column1) "emp_key" FROM emp""",
"""SELECT JSON_OBJECT('key1': emp.column1, 'key2': emp.column1) AS "emp_key" FROM emp""",
)
self.validate_identity(
"SELECT JSON_OBJECTAGG(KEY department_name VALUE department_id) FROM dep WHERE id <= 30",
"SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30",
)
self.validate_identity(
"SYSDATE",
"CURRENT_TIMESTAMP",
)
self.validate_identity(
"SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) "
'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) '
@ -131,6 +149,20 @@ class TestOracle(Validator):
"postgres": "CAST(x AS sch.udt)",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')",
write={
"oracle": "SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')",
"duckdb": "SELECT STRPTIME('2024-12-12 12:12:12.000000', '%Y-%m-%d %H:%M:%S.%f')",
},
)
self.validate_all(
"SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')",
write={
"oracle": "SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')",
"duckdb": "SELECT CAST(STRPTIME('2024-12-12', '%Y-%m-%d') AS DATE)",
},
)
def test_join_marker(self):
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")

View file

@ -1,4 +1,4 @@
from sqlglot import ParseError, exp, parse_one, transpile
from sqlglot import ParseError, UnsupportedError, exp, parse_one, transpile
from sqlglot.helper import logger as helper_logger
from tests.dialects.test_dialect import Validator
@ -11,7 +11,7 @@ class TestPostgres(Validator):
expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres")
cdef = expr.find(exp.ColumnDef)
cdef.args["kind"].assert_is(exp.DataType)
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL day)")
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)")
self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)")
self.validate_identity("CREATE TABLE test (elems JSONB[])")
@ -74,6 +74,12 @@ class TestPostgres(Validator):
"CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])",
"CREATE TABLE test (x TIMESTAMP[][])",
)
self.validate_identity(
"CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp"
)
self.validate_identity(
"WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp"
)
self.validate_all(
"CREATE OR REPLACE FUNCTION function_name (input_a character varying DEFAULT NULL::character varying)",
@ -158,7 +164,7 @@ class TestPostgres(Validator):
write={
"hive": "SELECT EXPLODE(c) FROM t",
"postgres": "SELECT UNNEST(c) FROM t",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM t, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(c)))) AS _u(pos) CROSS JOIN UNNEST(c) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(c) AND _u_2.pos_2 = CARDINALITY(c))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM t CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(c)))) AS _u(pos) CROSS JOIN UNNEST(c) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(c) AND _u_2.pos_2 = CARDINALITY(c))",
},
)
self.validate_all(
@ -262,6 +268,7 @@ class TestPostgres(Validator):
self.validate_identity(
"SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json(a, b)"
)
self.validate_identity("SELECT * FROM t TABLESAMPLE SYSTEM (50) REPEATABLE (55)")
self.validate_identity("x @@ y")
self.validate_identity("CAST(x AS MONEY)")
self.validate_identity("CAST(x AS INT4RANGE)")
@ -277,7 +284,7 @@ class TestPostgres(Validator):
self.validate_identity("CAST(x AS DATERANGE)")
self.validate_identity("CAST(x AS DATEMULTIRANGE)")
self.validate_identity(
"""LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 day' PRECEDING AND '1 month' FOLLOWING)"""
"""LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)"""
)
self.validate_identity("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]")
self.validate_identity("SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]")
@ -325,6 +332,13 @@ class TestPostgres(Validator):
"SELECT TO_TIMESTAMP(1284352323.5), TO_TIMESTAMP('05 Dec 2000', 'DD Mon YYYY')"
)
self.validate_all(
"SELECT * FROM t TABLESAMPLE SYSTEM (50)",
write={
"postgres": "SELECT * FROM t TABLESAMPLE SYSTEM (50)",
"redshift": UnsupportedError,
},
)
self.validate_all(
"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount)",
write={
@ -387,17 +401,17 @@ class TestPostgres(Validator):
self.validate_all(
"GENERATE_SERIES(a, b, ' 2 days ')",
write={
"postgres": "GENERATE_SERIES(a, b, INTERVAL '2 days')",
"presto": "SEQUENCE(a, b, INTERVAL '2' day)",
"trino": "SEQUENCE(a, b, INTERVAL '2' day)",
"postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')",
"presto": "SEQUENCE(a, b, INTERVAL '2' DAY)",
"trino": "SEQUENCE(a, b, INTERVAL '2' DAY)",
},
)
self.validate_all(
"GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
write={
"postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 day')",
"presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
"trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
"postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')",
"presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
"trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY)",
},
)
self.validate_all(
@ -657,7 +671,7 @@ class TestPostgres(Validator):
)
self.validate_all(
"'45 days'::interval day",
write={"postgres": "CAST('45 days' AS INTERVAL day)"},
write={"postgres": "CAST('45 days' AS INTERVAL DAY)"},
)
self.validate_all(
"'x' 'y' 'z'",
@ -726,6 +740,7 @@ class TestPostgres(Validator):
"""merge into x as x using (select id) as y on a = b WHEN matched then update set X."A" = y.b""",
write={
"postgres": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""",
"trino": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""",
"snowflake": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET X."A" = y.b""",
},
)

View file

@ -182,12 +182,12 @@ class TestPresto(Validator):
def test_interval_plural_to_singular(self):
# Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals
unit_to_expected = {
"SeCoNds": "second",
"minutes": "minute",
"hours": "hour",
"days": "day",
"months": "month",
"years": "year",
"SeCoNds": "SECOND",
"minutes": "MINUTE",
"hours": "HOUR",
"days": "DAY",
"months": "MONTH",
"years": "YEAR",
}
for unit, expected in unit_to_expected.items():
@ -279,10 +279,10 @@ class TestPresto(Validator):
},
)
self.validate_all(
"DATE_ADD('day', 1, x)",
"DATE_ADD('DAY', 1, x)",
write={
"duckdb": "x + INTERVAL 1 day",
"presto": "DATE_ADD('day', 1, x)",
"duckdb": "x + INTERVAL 1 DAY",
"presto": "DATE_ADD('DAY', 1, x)",
"hive": "DATE_ADD(x, 1)",
"spark": "DATE_ADD(x, 1)",
},
@ -582,6 +582,8 @@ class TestPresto(Validator):
},
)
self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)")
self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)")
self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1")
self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY")
@ -600,6 +602,16 @@ class TestPresto(Validator):
"SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)"
)
self.validate_all(
"SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))",
read={
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
},
write={
"duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))",
"presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))",
},
)
self.validate_all(
"SELECT MAX_BY(a.id, a.timestamp) FROM a",
read={
@ -707,9 +719,9 @@ class TestPresto(Validator):
},
)
self.validate_all("VALUES 1, 2, 3", write={"presto": "VALUES (1), (2), (3)"})
self.validate_all("INTERVAL '1 day'", write={"trino": "INTERVAL '1' day"})
self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' week"})
self.validate_all("(5 * INTERVAL '7' day)", read={"": "INTERVAL '5' WEEKS"})
self.validate_all("INTERVAL '1 day'", write={"trino": "INTERVAL '1' DAY"})
self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEK"})
self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEKS"})
self.validate_all(
"SELECT SUBSTRING(a, 1, 3), SUBSTRING(a, LENGTH(a) - (3 - 1))",
read={

View file

@ -6,6 +6,7 @@ class TestRedshift(Validator):
dialect = "redshift"
def test_redshift(self):
self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1")
self.validate_all(
"LISTAGG(sellerid, ', ')",
read={
@ -55,7 +56,7 @@ class TestRedshift(Validator):
},
)
self.validate_all(
"SELECT INTERVAL '5 days'",
"SELECT INTERVAL '5 DAYS'",
read={
"": "SELECT INTERVAL '5' days",
},
@ -64,10 +65,10 @@ class TestRedshift(Validator):
"SELECT ADD_MONTHS('2008-03-31', 1)",
write={
"bigquery": "SELECT DATE_ADD(CAST('2008-03-31' AS DATETIME), INTERVAL 1 MONTH)",
"duckdb": "SELECT CAST('2008-03-31' AS TIMESTAMP) + INTERVAL 1 month",
"redshift": "SELECT DATEADD(month, 1, '2008-03-31')",
"trino": "SELECT DATE_ADD('month', 1, CAST('2008-03-31' AS TIMESTAMP))",
"tsql": "SELECT DATEADD(month, 1, CAST('2008-03-31' AS DATETIME2))",
"duckdb": "SELECT CAST('2008-03-31' AS TIMESTAMP) + INTERVAL 1 MONTH",
"redshift": "SELECT DATEADD(MONTH, 1, '2008-03-31')",
"trino": "SELECT DATE_ADD('MONTH', 1, CAST('2008-03-31' AS TIMESTAMP))",
"tsql": "SELECT DATEADD(MONTH, 1, CAST('2008-03-31' AS DATETIME2))",
},
)
self.validate_all(
@ -234,45 +235,45 @@ class TestRedshift(Validator):
self.validate_all(
"DATEDIFF('day', a, b)",
write={
"bigquery": "DATE_DIFF(CAST(b AS DATETIME), CAST(a AS DATETIME), day)",
"duckdb": "DATE_DIFF('day', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))",
"bigquery": "DATE_DIFF(CAST(b AS DATETIME), CAST(a AS DATETIME), DAY)",
"duckdb": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))",
"hive": "DATEDIFF(b, a)",
"redshift": "DATEDIFF(day, a, b)",
"presto": "DATE_DIFF('day', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))",
"redshift": "DATEDIFF(DAY, a, b)",
"presto": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))",
},
)
self.validate_all(
"SELECT DATEADD(month, 18, '2008-02-28')",
write={
"bigquery": "SELECT DATE_ADD(CAST('2008-02-28' AS DATETIME), INTERVAL 18 MONTH)",
"duckdb": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL 18 month",
"duckdb": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL 18 MONTH",
"hive": "SELECT ADD_MONTHS('2008-02-28', 18)",
"mysql": "SELECT DATE_ADD('2008-02-28', INTERVAL 18 MONTH)",
"postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 month'",
"presto": "SELECT DATE_ADD('month', 18, CAST('2008-02-28' AS TIMESTAMP))",
"redshift": "SELECT DATEADD(month, 18, '2008-02-28')",
"snowflake": "SELECT DATEADD(month, 18, CAST('2008-02-28' AS TIMESTAMPNTZ))",
"tsql": "SELECT DATEADD(month, 18, CAST('2008-02-28' AS DATETIME2))",
"postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 MONTH'",
"presto": "SELECT DATE_ADD('MONTH', 18, CAST('2008-02-28' AS TIMESTAMP))",
"redshift": "SELECT DATEADD(MONTH, 18, '2008-02-28')",
"snowflake": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS TIMESTAMPNTZ))",
"tsql": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS DATETIME2))",
},
)
self.validate_all(
"SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2009-12-31' AS DATETIME), CAST('2009-01-01' AS DATETIME), week)",
"duckdb": "SELECT DATE_DIFF('week', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))",
"bigquery": "SELECT DATE_DIFF(CAST('2009-12-31' AS DATETIME), CAST('2009-01-01' AS DATETIME), WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))",
"hive": "SELECT CAST(DATEDIFF('2009-12-31', '2009-01-01') / 7 AS INT)",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST('2009-12-31' AS TIMESTAMP) - CAST('2009-01-01' AS TIMESTAMP))) / 7 AS BIGINT)",
"presto": "SELECT DATE_DIFF('week', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))",
"redshift": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')",
"snowflake": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')",
"tsql": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')",
"presto": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))",
"redshift": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')",
"snowflake": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')",
"tsql": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')",
},
)
def test_identity(self):
self.validate_identity("SELECT DATEADD(day, 1, 'today')")
self.validate_identity("SELECT DATEADD(DAY, 1, 'today')")
self.validate_identity("SELECT * FROM #x")
self.validate_identity("SELECT INTERVAL '5 day'")
self.validate_identity("SELECT INTERVAL '5 DAY'")
self.validate_identity("foo$")
self.validate_identity("CAST('bla' AS SUPER)")
self.validate_identity("CREATE TABLE real1 (realcol REAL)")
@ -295,11 +296,11 @@ class TestRedshift(Validator):
)
self.validate_identity(
"SELECT DATE_DIFF('month', CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))",
"SELECT DATEDIFF(month, CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))",
"SELECT DATEDIFF(MONTH, CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))",
)
self.validate_identity(
"SELECT * FROM x WHERE y = DATEADD('month', -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))",
"SELECT * FROM x WHERE y = DATEADD(month, -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))",
"SELECT * FROM x WHERE y = DATEADD(MONTH, -1, DATE_TRUNC('MONTH', (SELECT y FROM #temp_table)))",
)
self.validate_identity(
"SELECT 'a''b'",
@ -310,16 +311,16 @@ class TestRedshift(Validator):
"CREATE TABLE t (c BIGINT IDENTITY(0, 1))",
)
self.validate_identity(
"SELECT DATEADD(hour, 0, CAST('2020-02-02 01:03:05.124' AS TIMESTAMP))"
"SELECT DATEADD(HOUR, 0, CAST('2020-02-02 01:03:05.124' AS TIMESTAMP))"
)
self.validate_identity(
"SELECT DATEDIFF(second, '2020-02-02 00:00:00.000', '2020-02-02 01:03:05.124')"
"SELECT DATEDIFF(SECOND, '2020-02-02 00:00:00.000', '2020-02-02 01:03:05.124')"
)
self.validate_identity(
"CREATE OR REPLACE VIEW v1 AS SELECT id, AVG(average_metric1) AS m1, AVG(average_metric2) AS m2 FROM t GROUP BY id WITH NO SCHEMA BINDING"
)
self.validate_identity(
"SELECT caldate + INTERVAL '1 second' AS dateplus FROM date WHERE caldate = '12-31-2008'"
"SELECT caldate + INTERVAL '1 SECOND' AS dateplus FROM date WHERE caldate = '12-31-2008'"
)
self.validate_identity(
"SELECT COUNT(*) FROM event WHERE eventname LIKE '%Ring%' OR eventname LIKE '%Die%'"
@ -341,7 +342,7 @@ class TestRedshift(Validator):
)
self.validate_identity(
"SELECT DATEADD('day', ndays, caldate)",
"SELECT DATEADD(day, ndays, caldate)",
"SELECT DATEADD(DAY, ndays, caldate)",
)
self.validate_identity(
"CONVERT(INT, x)",
@ -349,8 +350,11 @@ class TestRedshift(Validator):
)
self.validate_identity(
"SELECT DATE_ADD('day', 1, DATE('2023-01-01'))",
"SELECT DATEADD(day, 1, DATE('2023-01-01'))",
"SELECT DATEADD(DAY, 1, DATE('2023-01-01'))",
)
self.validate_identity("SELECT * FROM x AS a, a.b AS c, c.d.e AS f, f.g.h.i.j.k AS l")
self.validate_identity(
"""SELECT
c_name,

View file

@ -10,6 +10,9 @@ class TestSnowflake(Validator):
dialect = "snowflake"
def test_snowflake(self):
self.validate_identity(
"INSERT OVERWRITE TABLE t SELECT 1", "INSERT OVERWRITE INTO t SELECT 1"
)
self.validate_identity("SELECT rename, replace")
expr = parse_one("SELECT APPROX_TOP_K(C4, 3, 5) FROM t")
expr.selects[0].assert_is(exp.AggFunc)
@ -36,9 +39,12 @@ WHERE
)""",
)
self.validate_identity("RM @parquet_stage")
self.validate_identity("REMOVE @parquet_stage")
self.validate_identity("SELECT TIMESTAMP_FROM_PARTS(d, t)")
self.validate_identity("SELECT GET_PATH(v, 'attr[0].name') FROM vartab")
self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))")
self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))")
self.validate_identity("SELECT user_id, value FROM table_name sample ($s) SEED (0)")
self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)")
self.validate_identity("SELECT OBJECT_CONSTRUCT()")
self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())")
@ -74,6 +80,49 @@ WHERE
self.validate_identity(
'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage'
)
self.validate_identity(
"SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)"
)
self.validate_identity(
"""SELECT PARSE_JSON('{"x": "hello"}'):x LIKE 'hello'""",
"""SELECT GET_PATH(PARSE_JSON('{"x": "hello"}'), 'x') LIKE 'hello'""",
)
self.validate_identity(
"""SELECT data:x LIKE 'hello' FROM some_table""",
"""SELECT GET_PATH(data, 'x') LIKE 'hello' FROM some_table""",
)
self.validate_identity(
"SELECT SUM({ fn CONVERT(123, SQL_DOUBLE) })",
"SELECT SUM(CAST(123 AS DOUBLE))",
)
self.validate_identity(
"SELECT SUM({ fn CONVERT(123, SQL_VARCHAR) })",
"SELECT SUM(CAST(123 AS VARCHAR))",
)
self.validate_identity(
"SELECT TIMESTAMPFROMPARTS(d, t)",
"SELECT TIMESTAMP_FROM_PARTS(d, t)",
)
self.validate_identity(
"SELECT user_id, value FROM table_name SAMPLE ($s) SEED (0)",
"SELECT user_id, value FROM table_name TABLESAMPLE ($s) SEED (0)",
)
self.validate_identity(
"SELECT v:attr[0].name FROM vartab",
"SELECT GET_PATH(v, 'attr[0].name') FROM vartab",
)
self.validate_identity(
'SELECT v:"fruit" FROM vartab',
"""SELECT GET_PATH(v, '"fruit"') FROM vartab""",
)
self.validate_identity(
"v:attr[0]:name",
"GET_PATH(GET_PATH(v, 'attr[0]'), 'name')",
)
self.validate_identity(
"""SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""",
"""SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""",
)
self.validate_identity(
"SELECT * FROM foo at",
"SELECT * FROM foo AS at",
@ -146,6 +195,80 @@ WHERE
"CAST(x AS VARCHAR)",
)
self.validate_all(
"OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)",
read={
"bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])",
"duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)",
},
write={
"bigquery": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)",
"duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)",
"snowflake": "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)",
},
)
self.validate_all(
"SELECT * FROM example TABLESAMPLE (3) SEED (82)",
read={
"databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)",
"duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)",
},
write={
"databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)",
"duckdb": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)",
"snowflake": "SELECT * FROM example TABLESAMPLE (3) SEED (82)",
},
)
self.validate_all(
"SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)",
write={
"duckdb": "SELECT MAKE_TIME(12, 34, 56 + (987654321 / 1000000000.0))",
"snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)",
},
)
self.validate_all(
"SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)",
read={
"duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)",
},
write={
"duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)",
"snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)",
},
)
self.validate_all(
"""WITH vartab(v) AS (select parse_json('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""",
write={
"bigquery": """WITH vartab AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]') AS v) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""",
"duckdb": """WITH vartab(v) AS (SELECT JSON('[{"attr": [{"name": "banana"}]}]')) SELECT v -> '$[0].attr[0].name' FROM vartab""",
"mysql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""",
"presto": """WITH vartab(v) AS (SELECT JSON_PARSE('[{"attr": [{"name": "banana"}]}]')) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""",
"snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""",
"tsql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_VALUE(v, '$[0].attr[0].name') FROM vartab""",
},
)
self.validate_all(
"""WITH vartab(v) AS (select parse_json('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""",
write={
"bigquery": """WITH vartab AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}') AS v) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""",
"duckdb": """WITH vartab(v) AS (SELECT JSON('{"attr": [{"name": "banana"}]}')) SELECT v -> '$.attr[0].name' FROM vartab""",
"mysql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""",
"presto": """WITH vartab(v) AS (SELECT JSON_PARSE('{"attr": [{"name": "banana"}]}')) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""",
"snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""",
"tsql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_VALUE(v, '$.attr[0].name') FROM vartab""",
},
)
self.validate_all(
"""SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""",
write={
"bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""",
"duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""",
"mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""",
"presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""",
"snowflake": """SELECT GET_PATH(PARSE_JSON('{"fruit":"banana"}'), 'fruit')""",
"tsql": """SELECT JSON_VALUE('{"fruit":"banana"}', '$.fruit')""",
},
)
self.validate_all(
"SELECT TO_ARRAY(['test'])",
write={
@ -162,7 +285,7 @@ WHERE
)
self.validate_all(
# We need to qualify the columns in this query because "value" would be ambiguous
'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t, TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))',
'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))',
read={
"duckdb": 'WITH t(x, "value") AS (SELECT [1,2,3], 1) SELECT UNNEST(t.x) AS "value" FROM t',
},
@ -172,7 +295,7 @@ WHERE
write={
"duckdb": "SELECT {'Manitoba': 'Winnipeg', 'foo': 'bar'} AS province_capital",
"snowflake": "SELECT OBJECT_CONSTRUCT('Manitoba', 'Winnipeg', 'foo', 'bar') AS province_capital",
"spark": "SELECT STRUCT('Manitoba' AS Winnipeg, 'foo' AS bar) AS province_capital",
"spark": "SELECT STRUCT('Winnipeg' AS Manitoba, 'bar' AS foo) AS province_capital",
},
)
self.validate_all(
@ -418,13 +541,11 @@ WHERE
},
)
self.validate_all(
'x:a:"b c"',
'''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''',
write={
"duckdb": "x['a']['b c']",
"hive": "x['a']['b c']",
"presto": "x['a']['b c']",
"snowflake": "x['a']['b c']",
"spark": "x['a']['b c']",
"duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a' -> '$."b c"'""",
"mysql": """SELECT JSON_EXTRACT(JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a'), '$."b c"')""",
"snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a'), '"b c"')""",
},
)
self.validate_all(
@ -457,6 +578,14 @@ WHERE
"spark": "SELECT TIMESTAMP_MILLIS(1659981729000)",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP(16599817290000, 4)",
write={
"bigquery": "SELECT TIMESTAMP_SECONDS(CAST(16599817290000 / POW(10, 4) AS INT64))",
"snowflake": "SELECT TO_TIMESTAMP(16599817290000, 4)",
"spark": "SELECT TIMESTAMP_SECONDS(16599817290000 / POW(10, 4))",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP('1659981729')",
write={
@ -467,11 +596,11 @@ WHERE
self.validate_all(
"SELECT TO_TIMESTAMP(1659981729000000000, 9)",
write={
"bigquery": "SELECT TIMESTAMP_MICROS(CAST(1659981729000000000 / 1000 AS INT64))",
"duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / 1000000000)",
"presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / 1000000000)",
"bigquery": "SELECT TIMESTAMP_SECONDS(CAST(1659981729000000000 / POW(10, 9) AS INT64))",
"duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / POW(10, 9))",
"presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / POW(10, 9))",
"snowflake": "SELECT TO_TIMESTAMP(1659981729000000000, 9)",
"spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / 1000000000)",
"spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / POW(10, 9))",
},
)
self.validate_all(
@ -660,7 +789,7 @@ WHERE
)
self.validate_identity(
"SELECT parse_json($1):a.b FROM @mystage2/data1.json.gz",
"SELECT PARSE_JSON($1)['a'].b FROM @mystage2/data1.json.gz",
"SELECT GET_PATH(PARSE_JSON($1), 'a.b') FROM @mystage2/data1.json.gz",
)
self.validate_identity(
"SELECT * FROM @mystage t (c1)",
@ -676,15 +805,24 @@ WHERE
self.validate_identity("SELECT * FROM testtable TABLESAMPLE (100)")
self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)")
self.validate_identity("SELECT * FROM testtable TABLESAMPLE (10 ROWS)")
self.validate_identity("SELECT * FROM testtable SAMPLE (10)")
self.validate_identity("SELECT * FROM testtable SAMPLE ROW (0)")
self.validate_identity("SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)")
self.validate_identity(
"SELECT i, j FROM table1 AS t1 INNER JOIN table2 AS t2 TABLESAMPLE (50) WHERE t2.j = t1.i"
)
self.validate_identity(
"SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)"
)
self.validate_identity(
"SELECT * FROM testtable SAMPLE (10)",
"SELECT * FROM testtable TABLESAMPLE (10)",
)
self.validate_identity(
"SELECT * FROM testtable SAMPLE ROW (0)",
"SELECT * FROM testtable TABLESAMPLE ROW (0)",
)
self.validate_identity(
"SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)",
"SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)",
)
self.validate_all(
"""
@ -695,20 +833,20 @@ WHERE
table2 AS t2 SAMPLE (50) -- 50% of rows in table2
WHERE t2.j = t1.i""",
write={
"snowflake": "SELECT i, j FROM table1 AS t1 SAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 SAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i",
"snowflake": "SELECT i, j FROM table1 AS t1 TABLESAMPLE (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 TABLESAMPLE (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i",
},
)
self.validate_all(
"SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)",
write={
"snowflake": "SELECT * FROM testtable SAMPLE BLOCK (0.012) SEED (99992)",
"snowflake": "SELECT * FROM testtable TABLESAMPLE BLOCK (0.012) SEED (99992)",
},
)
self.validate_all(
"SELECT * FROM (SELECT * FROM t1 join t2 on t1.a = t2.c) SAMPLE (1)",
write={
"snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1)",
"spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) SAMPLE (1 PERCENT)",
"snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1)",
"spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1 PERCENT)",
},
)
@ -791,6 +929,11 @@ WHERE
"DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))",
)
self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)")
self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)")
self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)")
self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)")
def test_semi_structured_types(self):
self.validate_identity("SELECT CAST(a AS VARIANT)")
self.validate_identity("SELECT CAST(a AS ARRAY)")
@ -876,7 +1019,7 @@ WHERE
location=@s2/logs/
partition_type = user_specified
file_format = (type = parquet)""",
"CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL1'] AS DATE)), col2 VARCHAR AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL2'] AS VARCHAR)), col3 DECIMAL AS (CAST(PARSE_JSON(metadata$external_table_partition)['COL3'] AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)",
"CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL1') AS DATE)), col2 VARCHAR AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL2') AS VARCHAR)), col3 DECIMAL AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL3') AS DECIMAL))) LOCATION @s2/logs/ PARTITION BY (col1, col2, col3) partition_type=user_specified file_format=(type = parquet)",
)
self.validate_identity("CREATE OR REPLACE VIEW foo (uid) COPY GRANTS AS (SELECT 1)")
self.validate_identity("CREATE TABLE geospatial_table (id INT, g GEOGRAPHY)")
@ -1092,9 +1235,9 @@ FROM cs.telescope.dag_report, TABLE(FLATTEN(input => SPLIT(operators, ','))) AS
"snowflake": """SELECT
id AS "ID",
f.value AS "Contact",
f1.value['type'] AS "Type",
f1.value['content'] AS "Details"
FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => f.value['business']) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""",
GET_PATH(f1.value, 'type') AS "Type",
GET_PATH(f1.value, 'content') AS "Details"
FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => GET_PATH(f.value, 'business')) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""",
},
pretty=True,
)
@ -1306,10 +1449,9 @@ MATCH_RECOGNIZE (
def test_show(self):
# Parsed as Command
self.validate_identity("SHOW COLUMNS IN TABLE dt_test")
self.validate_identity("SHOW TABLES LIKE 'line%' IN tpch.public")
ast = parse_one("SHOW TABLES HISTORY IN tpch.public")
ast = parse_one("SHOW TABLES HISTORY IN tpch.public", read="snowflake")
self.assertIsInstance(ast, exp.Command)
# Parsed as Show
@ -1331,9 +1473,22 @@ MATCH_RECOGNIZE (
ast = parse_one('SHOW PRIMARY KEYS IN "TEST"."PUBLIC"."customers"', read="snowflake")
table = ast.find(exp.Table)
self.assertIsNotNone(table)
self.assertEqual(table.sql(dialect="snowflake"), '"TEST"."PUBLIC"."customers"')
self.validate_identity("SHOW COLUMNS")
self.validate_identity("SHOW COLUMNS IN TABLE dt_test")
self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN TABLE dt_test")
self.validate_identity("SHOW COLUMNS IN VIEW")
self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN VIEW dt_test")
ast = parse_one("SHOW COLUMNS LIKE '_testing%' IN dt_test", read="snowflake")
table = ast.find(exp.Table)
literal = ast.find(exp.Literal)
self.assertEqual(table.sql(dialect="snowflake"), "dt_test")
self.assertEqual(literal.sql(dialect="snowflake"), "'_testing%'")
def test_swap(self):
ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake")
assert isinstance(ast, exp.AlterTable)

View file

@ -237,6 +237,10 @@ TBLPROPERTIES (
"REFRESH TABLE t",
)
self.validate_identity("DESCRIBE EXTENDED db.table")
self.validate_identity("SELECT * FROM test TABLESAMPLE (50 PERCENT)")
self.validate_identity("SELECT * FROM test TABLESAMPLE (5 ROWS)")
self.validate_identity("SELECT * FROM test TABLESAMPLE (BUCKET 4 OUT OF 10)")
self.validate_identity("REFRESH 'hdfs://path/to/table'")
self.validate_identity("REFRESH TABLE tempDB.view1")
self.validate_identity("SELECT CASE WHEN a = NULL THEN 1 ELSE 2 END")
@ -244,13 +248,21 @@ TBLPROPERTIES (
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)")
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)")
self.validate_identity("REFRESH TABLE a.b.c")
self.validate_identity("INTERVAL -86 days")
self.validate_identity("INTERVAL -86 DAYS")
self.validate_identity("SELECT UNIX_TIMESTAMP()")
self.validate_identity("TRIM(' SparkSQL ')")
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("SPLIT(str, pattern, lim)")
self.validate_identity(
"SELECT INTERVAL '5' HOURS '30' MINUTES '5' SECONDS '6' MILLISECONDS '7' MICROSECONDS",
"SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS",
)
self.validate_identity(
"SELECT INTERVAL 5 HOURS 30 MINUTES 5 SECONDS 6 MILLISECONDS 7 MICROSECONDS",
"SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS",
)
self.validate_identity(
"SELECT REGEXP_REPLACE('100-200', r'([^0-9])', '')",
"SELECT REGEXP_REPLACE('100-200', '([^0-9])', '')",
@ -299,25 +311,25 @@ TBLPROPERTIES (
},
)
self.validate_all(
"SELECT DATEDIFF(month, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
read={
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
},
write={
"spark": "SELECT DATEDIFF(month, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
"spark": "SELECT DATEDIFF(MONTH, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)), TO_DATE(CAST('1996-10-30' AS TIMESTAMP))) AS INT)",
},
)
self.validate_all(
"SELECT DATEDIFF(week, '2020-01-01', '2020-12-31')",
write={
"bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), week)",
"duckdb": "SELECT DATE_DIFF('week', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)",
"redshift": "SELECT DATEDIFF(week, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"snowflake": "SELECT DATEDIFF(week, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"spark": "SELECT DATEDIFF(week, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"snowflake": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
},
)
self.validate_all(
@ -624,23 +636,23 @@ TBLPROPERTIES (
self.validate_all(
"SELECT EXPLODE(x) FROM tbl",
write={
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))",
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))",
"spark": "SELECT EXPLODE(x) FROM tbl",
},
)
self.validate_all(
"SELECT EXPLODE(col) FROM _u",
write={
"bigquery": "SELECT IF(pos = pos_2, col_2, NULL) AS col_2 FROM _u, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(col)) - 1)) AS pos CROSS JOIN UNNEST(col) AS col_2 WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(col) - 1) AND pos_2 = (ARRAY_LENGTH(col) - 1))",
"presto": "SELECT IF(_u_2.pos = _u_3.pos_2, _u_3.col_2) AS col_2 FROM _u, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u_2(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_3(col_2, pos_2) WHERE _u_2.pos = _u_3.pos_2 OR (_u_2.pos > CARDINALITY(col) AND _u_3.pos_2 = CARDINALITY(col))",
"bigquery": "SELECT IF(pos = pos_2, col_2, NULL) AS col_2 FROM _u CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(col)) - 1)) AS pos CROSS JOIN UNNEST(col) AS col_2 WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(col) - 1) AND pos_2 = (ARRAY_LENGTH(col) - 1))",
"presto": "SELECT IF(_u_2.pos = _u_3.pos_2, _u_3.col_2) AS col_2 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u_2(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_3(col_2, pos_2) WHERE _u_2.pos = _u_3.pos_2 OR (_u_2.pos > CARDINALITY(col) AND _u_3.pos_2 = CARDINALITY(col))",
"spark": "SELECT EXPLODE(col) FROM _u",
},
)
self.validate_all(
"SELECT EXPLODE(col) AS exploded FROM schema.tbl",
write={
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.exploded) AS exploded FROM schema.tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_2(exploded, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(col) AND _u_2.pos_2 = CARDINALITY(col))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.exploded) AS exploded FROM schema.tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_2(exploded, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(col) AND _u_2.pos_2 = CARDINALITY(col))",
},
)
self.validate_all(
@ -666,13 +678,13 @@ TBLPROPERTIES (
self.validate_all(
"SELECT POSEXPLODE(ARRAY(2, 3)), EXPLODE(ARRAY(4, 5, 6)) FROM tbl",
write={
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2, IF(pos = pos_3, col_2, NULL) AS col_2 FROM tbl, UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3]), ARRAY_LENGTH([4, 5, 6])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5, 6]) AS col_2 WITH OFFSET AS pos_3 WHERE (pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5, 6]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5, 6]) - 1)))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2 FROM tbl, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3]), CARDINALITY(ARRAY[4, 5, 6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5, 6]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE (_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5, 6]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5, 6])))",
"bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2, IF(pos = pos_3, col_2, NULL) AS col_2 FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3]), ARRAY_LENGTH([4, 5, 6])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5, 6]) AS col_2 WITH OFFSET AS pos_3 WHERE (pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5, 6]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5, 6]) - 1)))",
"presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2 FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3]), CARDINALITY(ARRAY[4, 5, 6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5, 6]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE (_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5, 6]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5, 6])))",
},
)
self.validate_all(
"SELECT col, pos, POSEXPLODE(ARRAY(2, 3)) FROM _u",
write={
"presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u, UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))",
"presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))",
},
)

View file

@ -11,10 +11,10 @@ class TestStarrocks(Validator):
def test_time(self):
self.validate_identity("TIMESTAMP('2022-01-01')")
self.validate_identity(
"SELECT DATE_DIFF('second', '2010-11-30 23:59:59', '2010-11-30 20:58:59')"
"SELECT DATE_DIFF('SECOND', '2010-11-30 23:59:59', '2010-11-30 20:58:59')"
)
self.validate_identity(
"SELECT DATE_DIFF('minute', '2010-11-30 23:59:59', '2010-11-30 20:58:59')"
"SELECT DATE_DIFF('MINUTE', '2010-11-30 23:59:59', '2010-11-30 20:58:59')"
)
def test_regex(self):

View file

@ -6,10 +6,17 @@ class TestTSQL(Validator):
dialect = "tsql"
def test_tsql(self):
self.validate_identity("ROUND(x, 1, 0)")
self.validate_identity("EXEC MyProc @id=7, @name='Lochristi'")
# https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN
# tsql allows .. which means use the default schema
self.validate_identity("SELECT * FROM a..b")
self.validate_identity("SELECT TRIM(' test ') AS Result")
self.validate_identity("SELECT TRIM('.,! ' FROM ' # test .') AS Result")
self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT)")
self.validate_identity("SELECT * FROM t TABLESAMPLE (20 ROWS)")
self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT) REPEATABLE (123)")
self.validate_identity("SELECT CONCAT(column1, column2)")
self.validate_identity("SELECT TestSpecialChar.Test# FROM TestSpecialChar")
self.validate_identity("SELECT TestSpecialChar.Test@ FROM TestSpecialChar")
@ -20,6 +27,40 @@ class TestTSQL(Validator):
self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)")
self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0")
self.validate_all(
"SELECT TRIM(BOTH 'a' FROM a)",
read={
"mysql": "SELECT TRIM(BOTH 'a' FROM a)",
},
write={
"mysql": "SELECT TRIM(BOTH 'a' FROM a)",
"tsql": "SELECT TRIM(BOTH 'a' FROM a)",
},
)
self.validate_all(
"SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)",
read={
"duckdb": "SELECT MAKE_TIME(23, 59, 59)",
"mysql": "SELECT MAKETIME(23, 59, 59)",
"postgres": "SELECT MAKE_TIME(23, 59, 59)",
"snowflake": "SELECT TIME_FROM_PARTS(23, 59, 59)",
},
write={
"tsql": "SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)",
},
)
self.validate_all(
"SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)",
read={
# The nanoseconds are ignored since T-SQL doesn't support that precision
"snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 987654321)"
},
write={
"duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00 + (0 / 1000.0))",
"snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 0 * 1000000)",
"tsql": "SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)",
},
)
self.validate_all(
"SELECT TOP 1 * FROM (SELECT x FROM t1 UNION ALL SELECT x FROM t2) AS _l_0",
read={
@ -32,6 +73,33 @@ class TestTSQL(Validator):
"duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t",
},
)
self.validate_all(
"WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp",
write={
"duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp",
"postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp",
"oracle": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) temp",
},
)
self.validate_all(
"WITH t(c) AS (SELECT 1) SELECT * INTO TEMP UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp",
write={
"duckdb": "CREATE TEMPORARY TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp",
"postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO TEMPORARY foo FROM (SELECT c AS c FROM t) AS temp",
},
)
self.validate_all(
"WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp",
write={
"duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp",
},
)
self.validate_all(
"WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp",
write={
"duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp",
},
)
self.validate_all(
"WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y",
read={
@ -104,7 +172,8 @@ class TestTSQL(Validator):
)
self.validate_identity(
'CREATE TABLE x (CONSTRAINT "pk_mytable" UNIQUE NONCLUSTERED (a DESC)) ON b (c)'
'CREATE TABLE x (CONSTRAINT "pk_mytable" UNIQUE NONCLUSTERED (a DESC)) ON b (c)',
"CREATE TABLE x (CONSTRAINT [pk_mytable] UNIQUE NONCLUSTERED (a DESC)) ON b (c)",
)
self.validate_all(
@ -117,7 +186,7 @@ class TestTSQL(Validator):
) ON [SECONDARY]
""",
write={
"tsql": 'CREATE TABLE x ("zip_cd" VARCHAR(5) NULL NOT FOR REPLICATION, "zip_cd_mkey" VARCHAR(5) NOT NULL, CONSTRAINT "pk_mytable" PRIMARY KEY CLUSTERED ("zip_cd_mkey" ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON "INDEX") ON "SECONDARY"',
"tsql": "CREATE TABLE x ([zip_cd] VARCHAR(5) NULL NOT FOR REPLICATION, [zip_cd_mkey] VARCHAR(5) NOT NULL, CONSTRAINT [pk_mytable] PRIMARY KEY CLUSTERED ([zip_cd_mkey] ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON [INDEX]) ON [SECONDARY]",
"spark2": "CREATE TABLE x (`zip_cd` VARCHAR(5), `zip_cd_mkey` VARCHAR(5) NOT NULL, CONSTRAINT `pk_mytable` PRIMARY KEY (`zip_cd_mkey`))",
},
)
@ -136,7 +205,8 @@ class TestTSQL(Validator):
)
self.validate_identity(
"CREATE TABLE [db].[tbl]([a] [int])", 'CREATE TABLE "db"."tbl" ("a" INTEGER)'
"CREATE TABLE [db].[tbl]([a] [int])",
"CREATE TABLE [db].[tbl] ([a] INTEGER)",
)
projection = parse_one("SELECT a = 1", read="tsql").selects[0]
@ -172,7 +242,6 @@ class TestTSQL(Validator):
self.validate_identity("PRINT @TestVariable")
self.validate_identity("SELECT Employee_ID, Department_ID FROM @MyTableVar")
self.validate_identity("INSERT INTO @TestTable VALUES (1, 'Value1', 12, 20)")
self.validate_identity('SELECT "x"."y" FROM foo')
self.validate_identity("SELECT * FROM #foo")
self.validate_identity("SELECT * FROM ##foo")
self.validate_identity("SELECT a = 1", "SELECT 1 AS a")
@ -185,11 +254,15 @@ class TestTSQL(Validator):
self.validate_identity(
"SELECT DISTINCT DepartmentName, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BaseRate) OVER (PARTITION BY DepartmentName) AS MedianCont FROM dbo.DimEmployee"
)
self.validate_identity(
'SELECT "x"."y" FROM foo',
"SELECT [x].[y] FROM foo",
)
self.validate_all(
"SELECT CAST([a].[b] AS SMALLINT) FROM foo",
write={
"tsql": 'SELECT CAST("a"."b" AS SMALLINT) FROM foo',
"tsql": "SELECT CAST([a].[b] AS SMALLINT) FROM foo",
"spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo",
},
)
@ -227,13 +300,6 @@ class TestTSQL(Validator):
"postgres": "STRING_AGG(x, '|')",
},
)
self.validate_all(
"SELECT CAST([a].[b] AS SMALLINT) FROM foo",
write={
"tsql": 'SELECT CAST("a"."b" AS SMALLINT) FROM foo',
"spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo",
},
)
self.validate_all(
"HASHBYTES('SHA1', x)",
read={
@ -583,7 +649,11 @@ class TestTSQL(Validator):
'CREATE TABLE "dbo"."benchmark" ('
'"name" CHAR(7) NOT NULL, '
'"internal_id" VARCHAR(10) NOT NULL, '
f'UNIQUE {clusterd_keyword} ("internal_id" ASC))'
f'UNIQUE {clusterd_keyword} ("internal_id" ASC))',
"CREATE TABLE [dbo].[benchmark] ("
"[name] CHAR(7) NOT NULL, "
"[internal_id] VARCHAR(10) NOT NULL, "
f"UNIQUE {clusterd_keyword} ([internal_id] ASC))",
)
self.validate_identity(
@ -802,13 +872,13 @@ WHERE
"""
expected_sqls = [
'CREATE PROCEDURE "TRANSF"."SP_Merge_Sales_Real" @Loadid INTEGER, @NumberOfRows INTEGER AS BEGIN SET XACT_ABORT ON',
"CREATE PROCEDURE [TRANSF].[SP_Merge_Sales_Real] @Loadid INTEGER, @NumberOfRows INTEGER AS BEGIN SET XACT_ABORT ON",
"DECLARE @DWH_DateCreated DATETIME = CONVERT(DATETIME, getdate(), 104)",
"DECLARE @DWH_DateModified DATETIME = CONVERT(DATETIME, getdate(), 104)",
"DECLARE @DWH_IdUserCreated INTEGER = SUSER_ID (SYSTEM_USER)",
"DECLARE @DWH_IdUserModified INTEGER = SUSER_ID (SYSTEM_USER)",
"DECLARE @SalesAmountBefore float",
'SELECT @SalesAmountBefore = SUM(SalesAmount) FROM TRANSF."Pre_Merge_Sales_Real" AS S',
"SELECT @SalesAmountBefore = SUM(SalesAmount) FROM TRANSF.[Pre_Merge_Sales_Real] AS S",
"END",
]
@ -827,9 +897,9 @@ WHERE
"""
expected_sqls = [
'CREATE PROC "dbo"."transform_proc" AS DECLARE @CurrentDate VARCHAR(20)',
"CREATE PROC [dbo].[transform_proc] AS DECLARE @CurrentDate VARCHAR(20)",
"SET @CurrentDate = CAST(FORMAT(GETDATE(), 'yyyy-MM-dd HH:mm:ss') AS VARCHAR(20))",
'CREATE TABLE "target_schema"."target_table" (a INTEGER) WITH (DISTRIBUTION=REPLICATE, HEAP)',
"CREATE TABLE [target_schema].[target_table] (a INTEGER) WITH (DISTRIBUTION=REPLICATE, HEAP)",
]
for expr, expected_sql in zip(parse(sql, read="tsql"), expected_sqls):
@ -901,7 +971,7 @@ WHERE
def test_datepart(self):
self.validate_identity(
"DATEPART(QUARTER, x)",
"DATEPART(quarter, CAST(x AS DATETIME2))",
"DATEPART(QUARTER, CAST(x AS DATETIME2))",
)
self.validate_identity(
"DATEPART(YEAR, x)",
@ -909,15 +979,15 @@ WHERE
)
self.validate_identity(
"DATEPART(HOUR, date_and_time)",
"DATEPART(hour, CAST(date_and_time AS DATETIME2))",
"DATEPART(HOUR, CAST(date_and_time AS DATETIME2))",
)
self.validate_identity(
"DATEPART(WEEKDAY, date_and_time)",
"DATEPART(dw, CAST(date_and_time AS DATETIME2))",
"DATEPART(DW, CAST(date_and_time AS DATETIME2))",
)
self.validate_identity(
"DATEPART(DW, date_and_time)",
"DATEPART(dw, CAST(date_and_time AS DATETIME2))",
"DATEPART(DW, CAST(date_and_time AS DATETIME2))",
)
self.validate_all(
@ -929,9 +999,9 @@ WHERE
},
)
self.validate_all(
"SELECT DATEPART(year, CAST('2017-01-01' AS DATE))",
"SELECT DATEPART(YEAR, CAST('2017-01-01' AS DATE))",
read={
"postgres": "SELECT DATE_PART('year', '2017-01-01'::DATE)",
"postgres": "SELECT DATE_PART('YEAR', '2017-01-01'::DATE)",
},
write={
"postgres": "SELECT TO_CHAR(CAST(CAST('2017-01-01' AS DATE) AS TIMESTAMP), 'YYYY')",
@ -1135,7 +1205,7 @@ WHERE
)
def test_add_date(self):
self.validate_identity("SELECT DATEADD(year, 1, '2017/08/25')")
self.validate_identity("SELECT DATEADD(YEAR, 1, '2017/08/25')")
self.validate_all(
"DATEADD(year, 50, '2006-07-31')",
@ -1153,52 +1223,52 @@ WHERE
"SELECT DATEADD(wk, 1, '2017/08/25')",
write={
"spark": "SELECT DATE_ADD('2017/08/25', 7)",
"databricks": "SELECT DATEADD(week, 1, '2017/08/25')",
"databricks": "SELECT DATEADD(WEEK, 1, '2017/08/25')",
},
)
def test_date_diff(self):
self.validate_identity("SELECT DATEDIFF(hour, 1.5, '2021-01-01')")
self.validate_identity("SELECT DATEDIFF(HOUR, 1.5, '2021-01-01')")
self.validate_all(
"SELECT DATEDIFF(quarter, 0, '2021-01-01')",
write={
"tsql": "SELECT DATEDIFF(quarter, CAST('1900-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(quarter, CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"duckdb": "SELECT DATE_DIFF('quarter', CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"tsql": "SELECT DATEDIFF(QUARTER, CAST('1900-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(QUARTER, CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"duckdb": "SELECT DATE_DIFF('QUARTER', CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
},
)
self.validate_all(
"SELECT DATEDIFF(day, 1, '2021-01-01')",
write={
"tsql": "SELECT DATEDIFF(day, CAST('1900-01-02' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(day, CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"duckdb": "SELECT DATE_DIFF('day', CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"tsql": "SELECT DATEDIFF(DAY, CAST('1900-01-02' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(DAY, CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"duckdb": "SELECT DATE_DIFF('DAY', CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
},
)
self.validate_all(
"SELECT DATEDIFF(year, '2020-01-01', '2021-01-01')",
write={
"tsql": "SELECT DATEDIFF(year, CAST('2020-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(year, CAST('2020-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"tsql": "SELECT DATEDIFF(YEAR, CAST('2020-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))",
"spark": "SELECT DATEDIFF(YEAR, CAST('2020-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('2021-01-01' AS TIMESTAMP), CAST('2020-01-01' AS TIMESTAMP)) / 12 AS INT)",
},
)
self.validate_all(
"SELECT DATEDIFF(mm, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(month, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"databricks": "SELECT DATEDIFF(MONTH, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) AS INT)",
"tsql": "SELECT DATEDIFF(month, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))",
"tsql": "SELECT DATEDIFF(MONTH, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))",
},
)
self.validate_all(
"SELECT DATEDIFF(quarter, 'start', 'end')",
write={
"databricks": "SELECT DATEDIFF(quarter, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"spark": "SELECT DATEDIFF(quarter, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"databricks": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"spark": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) / 3 AS INT)",
"tsql": "SELECT DATEDIFF(quarter, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))",
"tsql": "SELECT DATEDIFF(QUARTER, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))",
},
)
@ -1220,39 +1290,45 @@ WHERE
self.validate_all(
"SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)",
write={
"spark": "SELECT x.a, x.b, t.v, t.y FROM x, LATERAL (SELECT v, y FROM t) AS t(v, y)",
"spark": "SELECT x.a, x.b, t.v, t.y FROM x INNER JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)",
"tsql": "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) AS t(v, y)",
},
)
self.validate_all(
"SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y)",
write={
"spark": "SELECT x.a, x.b, t.v, t.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)",
"tsql": "SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y)",
},
)
self.validate_all(
"SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y) OUTER APPLY (SELECT v, y FROM t) s(v, y) LEFT JOIN z ON z.id = s.id",
write={
"spark": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) LEFT JOIN LATERAL (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id",
"tsql": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y) OUTER APPLY (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id",
},
)
def test_lateral_table_valued_function(self):
self.validate_all(
"SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x)y(z)",
"SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x) y(z)",
write={
"spark": "SELECT t.x, y.z FROM x, LATERAL TVFTEST(t.x) AS y(z)",
"spark": "SELECT t.x, y.z FROM x INNER JOIN LATERAL TVFTEST(t.x) AS y(z)",
"tsql": "SELECT t.x, y.z FROM x CROSS APPLY TVFTEST(t.x) AS y(z)",
},
)
self.validate_all(
"SELECT t.x, y.z FROM x OUTER APPLY tvfTest(t.x)y(z)",
write={
"spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL TVFTEST(t.x) AS y(z)",
"tsql": "SELECT t.x, y.z FROM x OUTER APPLY TVFTEST(t.x) AS y(z)",
},
)
self.validate_all(
"SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)",
write={
"spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)",
"tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)",
},
)
@ -1284,27 +1360,45 @@ WHERE
self.validate_all(
"SELECT FORMAT(1000000.01,'###,###.###')",
write={"spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')"},
write={
"spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')",
"tsql": "SELECT FORMAT(1000000.01, '###,###.###')",
},
)
self.validate_all(
"SELECT FORMAT(1234567, 'f')",
write={"spark": "SELECT FORMAT_NUMBER(1234567, 'f')"},
write={
"spark": "SELECT FORMAT_NUMBER(1234567, 'f')",
"tsql": "SELECT FORMAT(1234567, 'f')",
},
)
self.validate_all(
"SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')",
write={"spark": "SELECT DATE_FORMAT('01-01-1991', 'dd.mm.yyyy')"},
write={
"spark": "SELECT DATE_FORMAT('01-01-1991', 'dd.mm.yyyy')",
"tsql": "SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')",
},
)
self.validate_all(
"SELECT FORMAT(date_col, 'dd.mm.yyyy')",
write={"spark": "SELECT DATE_FORMAT(date_col, 'dd.mm.yyyy')"},
write={
"spark": "SELECT DATE_FORMAT(date_col, 'dd.mm.yyyy')",
"tsql": "SELECT FORMAT(date_col, 'dd.mm.yyyy')",
},
)
self.validate_all(
"SELECT FORMAT(date_col, 'm')",
write={"spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')"},
write={
"spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')",
"tsql": "SELECT FORMAT(date_col, 'MMMM d')",
},
)
self.validate_all(
"SELECT FORMAT(num_col, 'c')",
write={"spark": "SELECT FORMAT_NUMBER(num_col, 'c')"},
write={
"spark": "SELECT FORMAT_NUMBER(num_col, 'c')",
"tsql": "SELECT FORMAT(num_col, 'c')",
},
)
def test_string(self):
@ -1324,11 +1418,36 @@ WHERE
def test_eomonth(self):
self.validate_all(
"EOMONTH(GETDATE())",
write={"spark": "LAST_DAY(CURRENT_TIMESTAMP())"},
read={
"spark": "LAST_DAY(CURRENT_TIMESTAMP())",
},
write={
"bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
"clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))",
"mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
"presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))",
"redshift": "LAST_DAY(CAST(SYSDATE AS DATE))",
"snowflake": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))",
"spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))",
"tsql": "EOMONTH(CAST(GETDATE() AS DATE))",
},
)
self.validate_all(
"EOMONTH(GETDATE(), -1)",
write={"spark": "LAST_DAY(ADD_MONTHS(CURRENT_TIMESTAMP(), -1))"},
write={
"bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))",
"clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
"duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)",
"mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))",
"postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)",
"presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', CAST(-1 AS BIGINT), CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))",
"redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(SYSDATE AS DATE)))",
"snowflake": "LAST_DAY(DATEADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS DATE)))",
"spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))",
"tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))",
},
)
def test_identifier_prefixes(self):
@ -1384,61 +1503,32 @@ WHERE
def test_temporal_table(self):
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON)"""
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON)""",
"CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON)",
)
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START HIDDEN NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END HIDDEN NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=ON))"""
"""CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START HIDDEN NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END HIDDEN NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))"""
)
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=ON))"""
"""CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))"""
)
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history", DATA_CONSISTENCY_CHECK=OFF))"""
"""CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=OFF))"""
)
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history"))"""
"""CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))"""
)
self.validate_identity(
"""CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE="dbo"."benchmark_history"))"""
"""CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))"""
)
def test_system_time(self):
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo'",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME AS OF 'foo'""",
},
)
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo' AS alias",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME AS OF 'foo' AS alias""",
},
)
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME FROM c TO d",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME FROM c TO d""",
},
)
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME BETWEEN c AND d",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME BETWEEN c AND d""",
},
)
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME CONTAINED IN (c, d)",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME CONTAINED IN (c, d)""",
},
)
self.validate_all(
"SELECT [x] FROM [a].[b] FOR SYSTEM_TIME ALL AS alias",
write={
"tsql": """SELECT "x" FROM "a"."b" FOR SYSTEM_TIME ALL AS alias""",
},
)
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo'")
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo' AS alias")
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME FROM c TO d")
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME BETWEEN c AND d")
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME CONTAINED IN (c, d)")
self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME ALL AS alias")
def test_current_user(self):
self.validate_all(
@ -1491,7 +1581,7 @@ WHERE
self.validate_all(
"""SELECT [key], value FROM OPENJSON(@json,'$.path.to."sub-object"')""",
write={
"tsql": """SELECT "key", value FROM OPENJSON(@json, '$.path.to."sub-object"')""",
"tsql": """SELECT [key], value FROM OPENJSON(@json, '$.path.to."sub-object"')""",
},
)
self.validate_all(
@ -1520,7 +1610,7 @@ FROM OPENJSON(@json) WITH (
Date DATETIME2 '$.Order.Date',
Customer VARCHAR(200) '$.AccountNumber',
Quantity INTEGER '$.Item.Quantity',
"Order" VARCHAR(MAX) AS JSON
[Order] VARCHAR(MAX) AS JSON
)"""
},
pretty=True,
@ -1547,19 +1637,19 @@ FROM OPENJSON(@json) WITH (
def test_qualify_derived_table_outputs(self):
self.validate_identity(
"WITH t AS (SELECT 1) SELECT * FROM t",
'WITH t AS (SELECT 1 AS "1") SELECT * FROM t',
"WITH t AS (SELECT 1 AS [1]) SELECT * FROM t",
)
self.validate_identity(
'WITH t AS (SELECT "c") SELECT * FROM t',
'WITH t AS (SELECT "c" AS "c") SELECT * FROM t',
"WITH t AS (SELECT [c] AS [c]) SELECT * FROM t",
)
self.validate_identity(
"SELECT * FROM (SELECT 1) AS subq",
'SELECT * FROM (SELECT 1 AS "1") AS subq',
"SELECT * FROM (SELECT 1 AS [1]) AS subq",
)
self.validate_identity(
'SELECT * FROM (SELECT "c") AS subq',
'SELECT * FROM (SELECT "c" AS "c") AS subq',
"SELECT * FROM (SELECT [c] AS [c]) AS subq",
)
self.validate_all(

View file

@ -107,6 +107,7 @@ CURRENT_DATE('UTC')
CURRENT_DATE AT TIME ZONE 'UTC'
CURRENT_DATE AT TIME ZONE zone_column
CURRENT_DATE AT TIME ZONE 'UTC' AT TIME ZONE 'Asia/Tokio'
f1 AT TIME ZONE INTERVAL '-10:00' AS f1
ARRAY()
ARRAY(1, 2)
ARRAY(time, foo)
@ -137,19 +138,26 @@ x ILIKE '%y%'
x LIKE '%y%' ESCAPE '\'
x ILIKE '%y%' ESCAPE '\'
1 AS escape
INTERVAL '1' day
INTERVAL '1' DAY
INTERVAL '1' MONTH
INTERVAL '1' YEAR
INTERVAL '-1' CURRENT_DATE
INTERVAL '-31' CAST(GETDATE() AS DATE)
INTERVAL (1 + 3) DAYS
INTERVAL '1' day * 5
5 * INTERVAL '1' day
CASE WHEN TRUE THEN INTERVAL '15' days END
INTERVAL '1' DAY * 5
5 * INTERVAL '1' DAY
CASE WHEN TRUE THEN INTERVAL '15' DAYS END
CASE WHEN TRUE THEN 1 ELSE interval END
CASE WHEN TRUE THEN 1 ELSE "INTERVAL" END
SELECT * WHERE interval IS NULL
SELECT * WHERE NOT interval IS NULL
SELECT * WHERE INTERVAL "is" > 1
SELECT * WHERE INTERVAL x.is > 1
CAST('45' AS INTERVAL DAYS)
CAST(x AS UUID)
FILTER(a, x -> x.a.b.c.d.e.f.g)
FILTER(a, x -> FOO(x.a.b.c.d.e.f.g) + x.a.b.c.d.e.f.g)
TIMESTAMP_FROM_PARTS(2019, 1, 10, 2, 3, 4, 123456789, 'America/Los_Angeles')
TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), 1, DAY)
DATETIME_DIFF(CURRENT_DATE, 1, DAY)
QUANTILE(x, 0.5)
@ -164,7 +172,7 @@ REPLACE(1)
DATE(x) = DATE(y)
TIMESTAMP(DATE(x))
TIMESTAMP_TRUNC(COALESCE(time_field, CURRENT_TIMESTAMP()), DAY)
COUNT(DISTINCT CASE WHEN DATE_TRUNC(DATE(time_field), isoweek) = DATE_TRUNC(DATE(time_field2), isoweek) THEN report_id ELSE NULL END)
COUNT(DISTINCT CASE WHEN DATE_TRUNC('ISOWEEK', DATE(time_field)) = DATE_TRUNC('ISOWEEK', DATE(time_field2)) THEN report_id ELSE NULL END)
COUNT(a, b)
x[y - 1]
CASE WHEN SUM(x) > 3 THEN 1 END OVER (PARTITION BY x)
@ -224,7 +232,7 @@ SELECT DISTINCT ON (x, y + 1) * FROM z
SELECT DISTINCT ON (x.y) * FROM z
SELECT DISTINCT FROM_SOMETHING
SELECT top.x
SELECT TIMESTAMP(DATE_TRUNC(DATE(time_field), MONTH)) AS time_value FROM "table"
SELECT TIMESTAMP(DATE_TRUNC('MONTH', DATE(time_field))) AS time_value FROM "table"
SELECT GREATEST((3 + 1), LEAST(3, 4))
SELECT TRANSFORM(a, b -> b) AS x
SELECT AGGREGATE(a, (a, b) -> a + b) AS x
@ -335,17 +343,13 @@ SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5)
SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5 ON x)
SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5 ON RAND())
SELECT a FROM test TABLESAMPLE (0.1 PERCENT)
SELECT a FROM test TABLESAMPLE (100)
SELECT a FROM test TABLESAMPLE (100 ROWS)
SELECT a FROM test TABLESAMPLE BERNOULLI (50)
SELECT a FROM test TABLESAMPLE SYSTEM (75)
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q'))
SELECT 1 FROM a.b.table1 AS t UNPIVOT((c3) FOR c4 IN (a, b))
SELECT a FROM test PIVOT(SOMEAGG(x, y, z) FOR q IN (1))
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) PIVOT(MAX(b) FOR c IN ('d'))
SELECT a FROM (SELECT a, b FROM test) PIVOT(SUM(x) FOR y IN ('z', 'q'))
SELECT a FROM test UNPIVOT(x FOR y IN (z, q)) AS x
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE (0.1)
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) UNPIVOT(x FOR y IN (z, q)) AS x
SELECT ABS(a) FROM test
SELECT AVG(a) FROM test
@ -590,6 +594,7 @@ CREATE TABLE IF NOT EXISTS customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDEN
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 1 MINVALUE -1 MAXVALUE 1 NO CYCLE))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (CYCLE))
CREATE TABLE customer (period INT NOT NULL)
CREATE TABLE foo (baz_id INT REFERENCES baz (id) DEFERRABLE)
CREATE TABLE foo (baz CHAR(4) CHARACTER SET LATIN UPPERCASE NOT CASESPECIFIC COMPRESS 'a')
CREATE TABLE foo (baz DATE FORMAT 'YYYY/MM/DD' TITLE 'title' INLINE LENGTH 1 COMPRESS ('a', 'b'))
@ -621,6 +626,9 @@ CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1'
CREATE FUNCTION a() LANGUAGE sql
CREATE FUNCTION a() LANGUAGE sql RETURNS INT
CREATE FUNCTION a.b(x INT) RETURNS INT AS RETURN x + 1
CREATE FUNCTION a.b(x TEXT) RETURNS TEXT CONTAINS SQL AS RETURN x
CREATE FUNCTION a.b(x TEXT) RETURNS TEXT LANGUAGE SQL MODIFIES SQL DATA AS RETURN x
CREATE FUNCTION a.b(x TEXT) LANGUAGE SQL READS SQL DATA RETURNS TEXT AS RETURN x
CREATE FUNCTION a.b.c()
CREATE INDEX abc ON t(a)
CREATE INDEX "abc" ON t(a)
@ -635,6 +643,7 @@ CREATE DATABASE x
CREATE DATABASE IF NOT EXISTS y
CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END'
CREATE OR REPLACE STAGE
CREATE TABLE T3 AS (SELECT DISTINCT A FROM T1 EXCEPT (SELECT A FROM T2) LIMIT 1)
DESCRIBE x
DROP INDEX a.b.c
DROP FUNCTION a.b.c (INT)
@ -741,6 +750,7 @@ SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z
SELECT ((SELECT 1) + 1)
SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES
SELECT CAST(x AS INT) /* comment */ FROM foo
SELECT c /* c1 */ AS alias /* c2 */
SELECT a /* x */, b /* x */
SELECT a /* x */ /* y */ /* z */, b /* k */ /* m */
SELECT * FROM foo /* x */, bla /* x */
@ -756,9 +766,6 @@ INSERT INTO foo SELECT * FROM bar /* comment */
/* c */ WITH x AS (SELECT 1) SELECT * FROM x
SELECT a FROM x WHERE a COLLATE 'utf8_general_ci' = 'b'
SELECT x AS INTO FROM bla
SELECT * INTO newevent FROM event
SELECT * INTO TEMPORARY newevent FROM event
SELECT * INTO UNLOGGED newevent FROM event
ALTER TABLE integers ADD COLUMN k INT
ALTER TABLE integers ADD COLUMN k INT FIRST
ALTER TABLE integers ADD COLUMN k INT AFTER m

View file

@ -16,8 +16,8 @@ SELECT '1' + 1 AS "col";
SELECT '1' + '1' AS "col";
SELECT CONCAT('1', '1') AS "col";
SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' day;
SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' day AS "_col_0";
SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY;
SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY AS "_col_0";
SELECT CAST('2022-01-01' AS DATE) IS NULL AS "a";
SELECT CAST('2022-01-01' AS DATE) IS NULL AS "a";

View file

@ -626,8 +626,7 @@ SELECT
"_q_0"."G" AS "G",
"_q_0"."'x'" AS "'x'",
"_q_0"."'y'" AS "'y'"
FROM "U" AS "U" PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0"
;
FROM "U" AS "U" PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0";
# title: selecting all columns from a pivoted source and generating spark
# note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`)
@ -644,6 +643,110 @@ FROM (
FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y'))
) AS `_q_0`;
# title: selecting all columns from a pivoted source, pivot has column aliases
# execute: false
# dialect: snowflake
WITH source AS (
SELECT
id,
key,
value,
timestamp_1,
timestamp_2
FROM DB_NAME.SCHEMA_NAME.TABLE_NAME
),
enriched AS (
SELECT * FROM source
PIVOT(MAX(value) FOR key IN ('a', 'b', 'c'))
AS final (id, timestamp_1, timestamp_2, col_1, col_2, col_3)
)
SELECT id, timestamp_1 FROM enriched;
WITH "SOURCE" AS (
SELECT
"TABLE_NAME"."ID" AS "ID",
"TABLE_NAME"."KEY" AS "KEY",
"TABLE_NAME"."VALUE" AS "VALUE",
"TABLE_NAME"."TIMESTAMP_1" AS "TIMESTAMP_1",
"TABLE_NAME"."TIMESTAMP_2" AS "TIMESTAMP_2"
FROM "DB_NAME"."SCHEMA_NAME"."TABLE_NAME" AS "TABLE_NAME"
)
SELECT
"FINAL"."ID" AS "ID",
"FINAL"."TIMESTAMP_1" AS "TIMESTAMP_1"
FROM "SOURCE" AS "SOURCE" PIVOT(MAX("SOURCE"."VALUE") FOR "SOURCE"."KEY" IN ('a', 'b', 'c')) AS "FINAL"("ID", "TIMESTAMP_1", "TIMESTAMP_2", "COL_1", "COL_2", "COL_3");
# title: unpivoted table source with a single value column, unpivot columns can't be qualified
# execute: false
# dialect: snowflake
SELECT * FROM m_sales AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid;
SELECT
"_q_0"."EMPID" AS "EMPID",
"_q_0"."DEPT" AS "DEPT",
"_q_0"."MONTH" AS "MONTH",
"_q_0"."SALES" AS "SALES"
FROM "M_SALES" AS "M_SALES"("EMPID", "DEPT", "JAN", "FEB") UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
ORDER BY
"_q_0"."EMPID";
# title: unpivoted table source, unpivot has column aliases
# execute: false
SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) AS unpiv(a, b, c, d);
SELECT
"unpiv"."a" AS "a",
"unpiv"."b" AS "b",
"unpiv"."c" AS "c",
"unpiv"."d" AS "d"
FROM (
SELECT
"m_sales"."empid" AS "empid",
"m_sales"."dept" AS "dept",
"m_sales"."jan" AS "jan",
"m_sales"."feb" AS "feb"
FROM "m_sales" AS "m_sales"
) AS "m_sales" UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a", "b", "c", "d");
# title: unpivoted derived table source with a single value column
# execute: false
# dialect: snowflake
SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid;
SELECT
"_q_0"."EMPID" AS "EMPID",
"_q_0"."DEPT" AS "DEPT",
"_q_0"."MONTH" AS "MONTH",
"_q_0"."SALES" AS "SALES"
FROM (
SELECT
"M_SALES"."EMPID" AS "EMPID",
"M_SALES"."DEPT" AS "DEPT",
"M_SALES"."JAN" AS "JAN",
"M_SALES"."FEB" AS "FEB"
FROM "M_SALES" AS "M_SALES"
) AS "M_SALES" UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
ORDER BY
"_q_0"."EMPID";
# title: unpivoted table source with a single value column, unpivot columns can be qualified
# execute: false
# dialect: bigquery
# note: the named columns aren't supported by BQ but we add them here to avoid defining a schema
SELECT * FROM produce AS produce(product, q1, q2, q3, q4) UNPIVOT(sales FOR quarter IN (q1, q2, q3, q4));
SELECT
`_q_0`.`product` AS `product`,
`_q_0`.`quarter` AS `quarter`,
`_q_0`.`sales` AS `sales`
FROM `produce` AS `produce` UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`, `produce`.`q3`, `produce`.`q4`)) AS `_q_0`;
# title: unpivoted table source with multiple value columns
# execute: false
# dialect: bigquery
SELECT * FROM produce AS produce(product, q1, q2, q3, q4) UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'));
SELECT
`_q_0`.`product` AS `product`,
`_q_0`.`semesters` AS `semesters`,
`_q_0`.`first_half_sales` AS `first_half_sales`,
`_q_0`.`second_half_sales` AS `second_half_sales`
FROM `produce` AS `produce` UNPIVOT((`first_half_sales`, `second_half_sales`) FOR `semesters` IN ((`produce`.`q1`, `produce`.`q2`) AS 'semester_1', (`produce`.`q3`, `produce`.`q4`) AS 'semester_2')) AS `_q_0`;
# title: quoting is maintained
# dialect: snowflake
with cte1("id", foo) as (select 1, 2) select "id" from cte1;
@ -1144,3 +1247,29 @@ WITH "x" AS (
LIMIT 10
)
LIMIT 10;
# title: avoid producing DAG cycle when pushing down predicate to join
# execute: false
SELECT
a.company,
b.num
FROM route AS a(num, company, pos, stop)
JOIN route AS b(num, company, pos, stop) ON (a.num = b.num)
JOIN stops AS c(id, name) ON (c.id = b.stop)
JOIN stops AS d(id, name) ON (d.id = c.id)
WHERE
c.name = 'Craiglockhart'
OR d.name = 'Tollcross';
SELECT
"a"."company" AS "company",
"b"."num" AS "num"
FROM "route" AS "a"("num", "company", "pos", "stop")
JOIN "route" AS "b"("num", "company", "pos", "stop")
ON "a"."num" = "b"."num"
JOIN "stops" AS "c"("id", "name")
ON "b"."stop" = "c"."id"
JOIN "stops" AS "d"("id", "name")
ON "c"."id" = "d"."id"
AND (
"c"."name" = 'Craiglockhart' OR "d"."name" = 'Tollcross'
);

View file

@ -0,0 +1,20 @@
WITH y(c) AS (SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0) SELECT c FROM y;
WITH y(c) AS (SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0) SELECT c FROM y;
WITH y(c) AS (SELECT SUM(a) as d FROM (SELECT 1 a) AS x HAVING c > 0) SELECT c FROM y;
WITH y(c) AS (SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0) SELECT c FROM y;
WITH x(c) AS (SELECT SUM(1) a HAVING c > 0 LIMIT 1) SELECT * FROM x;
WITH x(c) AS (SELECT SUM(1) AS c HAVING c > 0 LIMIT 1) SELECT * FROM x;
-- Invalid statement in Snowflake but checking more complex structures
WITH x(c) AS ((SELECT 1 a) HAVING c > 0) SELECT * FROM x;
WITH x(c) AS ((SELECT 1 AS a) HAVING c > 0) SELECT * FROM x;
-- Invalid statement in Snowflake but checking more complex structures
WITH x(c) AS ((SELECT SUM(1) a) HAVING c > 0 LIMIT 1) SELECT * FROM x;
WITH x(c) AS ((SELECT SUM(1) AS a) HAVING c > 0 LIMIT 1) SELECT * FROM x;
-- Invalid statement in Snowflake but checking that we don't fail
WITH x(c) AS (SELECT SUM(a) FROM x HAVING c > 0 UNION ALL SELECT SUM(a) FROM y HAVING c > 0) SELECT * FROM x;
WITH x(c) AS (SELECT SUM(a) FROM x HAVING c > 0 UNION ALL SELECT SUM(a) FROM y HAVING c > 0) SELECT * FROM x;

View file

@ -11,7 +11,7 @@ SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x
SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a WHERE TRUE;
SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.b;
SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a OR x.a = y.b WHERE (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b;
SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b WHERE (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b;
SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x) AS x WHERE x.c = 1;
SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x WHERE x.b * 1 = 1) AS x WHERE TRUE;

View file

@ -153,7 +153,7 @@ SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + x.a) AS sum_agg FROM
# dialect: starrocks
# execute: false
SELECT DATE_TRUNC('week', a) AS a FROM x;
SELECT DATE_TRUNC('week', x.a) AS a FROM x AS x;
SELECT DATE_TRUNC('WEEK', x.a) AS a FROM x AS x;
# dialect: bigquery
# execute: false
@ -276,6 +276,17 @@ SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0;
SELECT * FROM x GROUP BY 1, 2;
SELECT x.a AS a, x.b AS b FROM x AS x GROUP BY x.a, x.b;
SELECT * FROM (SELECT * FROM x) AS s(a, b);
SELECT s.a AS a, s.b AS b FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS s;
# execute: false
SELECT * FROM (SELECT * FROM t) AS s(a, b);
SELECT s.a AS a, s.b AS b FROM (SELECT t.a AS a, t.b AS b FROM t AS t) AS s;
# execute: false
SELECT * FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2) AS s(b);
SELECT s.b AS b FROM (SELECT t1.b AS b FROM t1 AS t1 UNION ALL SELECT t2.b AS b FROM t2 AS t2) AS s;
--------------------------------------
-- CTEs
--------------------------------------
@ -430,6 +441,16 @@ SELECT t.aa AS aa FROM x AS x, UNNEST(x.a) AS t(aa);
SELECT aa FROM x, UNNEST(a) AS aa;
SELECT aa AS aa FROM x AS x, UNNEST(x.a) AS aa;
# dialect: bigquery
# execute: false
select * from unnest ([1, 2]) as x with offset;
SELECT x AS x, offset AS offset FROM UNNEST([1, 2]) AS x WITH OFFSET AS offset;
# dialect: bigquery
# execute: false
select * from unnest ([1, 2]) as x with offset as y;
SELECT x AS x, y AS y FROM UNNEST([1, 2]) AS x WITH OFFSET AS y;
# dialect: presto
SELECT x.a, i.b FROM x CROSS JOIN UNNEST(SPLIT(b, ',')) AS i(b);
SELECT x.a AS a, i.b AS b FROM x AS x CROSS JOIN UNNEST(SPLIT(x.b, ',')) AS i(b);

View file

@ -14,6 +14,11 @@ SELECT 1 FROM x.y.z AS z;
SELECT 1 FROM x.y.z AS z;
SELECT 1 FROM x.y.z AS z;
# title: redshift unnest syntax, z.a should be a column, not a table
# dialect: redshift
SELECT 1 FROM y.z AS z, z.a;
SELECT 1 FROM c.y.z AS z, z.a;
# title: cte can't be qualified
WITH a AS (SELECT 1 FROM z) SELECT 1 FROM a;
WITH a AS (SELECT 1 FROM c.db.z AS z) SELECT 1 FROM a;
@ -26,6 +31,10 @@ SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x;
SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b'));
SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _q_0;
# title: pivoted table, pivot has alias
SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')) AS piv;
SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS piv;
# title: wrapped table without alias
SELECT * FROM (tbl);
SELECT * FROM (c.db.tbl AS tbl);

View file

@ -427,7 +427,7 @@ FALSE;
TRUE;
date '1998-12-01' - interval x day;
CAST('1998-12-01' AS DATE) - INTERVAL x day;
CAST('1998-12-01' AS DATE) - INTERVAL x DAY;
date '1998-12-01' - interval '90' day;
CAST('1998-09-02' AS DATE);
@ -442,13 +442,13 @@ interval '1' year + date '1998-01-01' + 3 * 7 * 4;
CAST('1999-01-01' AS DATE) + 84;
date '1998-12-01' - interval '90' foo;
CAST('1998-12-01' AS DATE) - INTERVAL '90' foo;
CAST('1998-12-01' AS DATE) - INTERVAL '90' FOO;
date '1998-12-01' + interval '90' foo;
CAST('1998-12-01' AS DATE) + INTERVAL '90' foo;
CAST('1998-12-01' AS DATE) + INTERVAL '90' FOO;
CAST(x AS DATE) + interval '1' week;
CAST(x AS DATE) + INTERVAL '1' week;
CAST(x AS DATE) + INTERVAL '1' WEEK;
CAST('2008-11-11' AS DATETIME) + INTERVAL '5' MONTH;
CAST('2009-04-11 00:00:00' AS DATETIME);
@ -456,8 +456,8 @@ CAST('2009-04-11 00:00:00' AS DATETIME);
datetime '1998-12-01' - interval '90' day;
CAST('1998-09-02 00:00:00' AS DATETIME);
CAST(x AS DATETIME) + interval '1' week;
CAST(x AS DATETIME) + INTERVAL '1' week;
CAST(x AS DATETIME) + interval '1' WEEK;
CAST(x AS DATETIME) + INTERVAL '1' WEEK;
TS_OR_DS_TO_DATE('1998-12-01 00:00:01') - interval '90' day;
CAST('1998-09-02' AS DATE);
@ -844,14 +844,14 @@ x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
-- Always false, except for nulls
DATE_TRUNC('quarter', x) = CAST('2021-01-02' AS DATE);
DATE_TRUNC('quarter', x) = CAST('2021-01-02' AS DATE);
DATE_TRUNC('QUARTER', x) = CAST('2021-01-02' AS DATE);
DATE_TRUNC('year', x) <> CAST('2021-01-01' AS DATE);
FALSE;
-- Always true, except for nulls
DATE_TRUNC('year', x) <> CAST('2021-01-02' AS DATE);
DATE_TRUNC('year', x) <> CAST('2021-01-02' AS DATE);
DATE_TRUNC('YEAR', x) <> CAST('2021-01-02' AS DATE);
DATE_TRUNC('year', x) <= CAST('2021-01-01' AS DATE);
x < CAST('2022-01-01' AS DATE);
@ -884,11 +884,11 @@ DATE_TRUNC('year', x) > TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE('2021-01-02'));
x >= CAST('2022-01-01' AS DATE);
DATE_TRUNC('year', x) > TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE('2021-01-02', '%Y'));
DATE_TRUNC('year', x) > TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE('2021-01-02', '%Y'));
DATE_TRUNC('YEAR', x) > CAST(STR_TO_TIME('2021-01-02', '%Y') AS DATE);
-- right is not a date
DATE_TRUNC('year', x) <> '2021-01-02';
DATE_TRUNC('year', x) <> '2021-01-02';
DATE_TRUNC('YEAR', x) <> '2021-01-02';
DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2023-01-01' AS DATE));
(x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE)) OR (x < CAST('2024-01-01' AS DATE) AND x >= CAST('2023-01-01' AS DATE));
@ -906,7 +906,7 @@ x < CAST('2022-01-01 00:00:00' AS DATETIME) AND x >= CAST('2021-01-01 00:00:00'
-- right side is not a date literal
DATE_TRUNC('day', x) = CAST(y AS DATE);
CAST(y AS DATE) = DATE_TRUNC('day', x);
CAST(y AS DATE) = DATE_TRUNC('DAY', x);
-- nested cast
DATE_TRUNC('day', x) = CAST(CAST('2021-01-01 01:02:03' AS DATETIME) AS DATE);
@ -973,7 +973,7 @@ DATE_ADD(DATE_ADD(DATE_TRUNC('week', DATE_SUB(x, 1, DAY)), 1, DAY), 1, YEAR) < C
x < CAST('2020-01-14' AS DATE);
x - INTERVAL '1' day = CAST(y AS DATE);
CAST(y AS DATE) = x - INTERVAL '1' day;
CAST(y AS DATE) = x - INTERVAL '1' DAY;
--------------------------------------
-- Constant Propagation
@ -1094,3 +1094,33 @@ CASE WHEN x = y THEN z END;
CASE x1 + x2 WHEN x3 THEN x4 WHEN x5 + x6 THEN x7 ELSE x8 END;
CASE WHEN x3 = (x1 + x2) THEN x4 WHEN (x1 + x2) = (x5 + x6) THEN x7 ELSE x8 END;
--------------------------------------
-- Simplify STARTSWITH
--------------------------------------
STARTS_WITH('foo', 'f');
TRUE;
STARTS_WITH('foo', 'g');
FALSE;
STARTS_WITH('', 'f');
FALSE;
STARTS_WITH('', '');
TRUE;
STARTS_WITH('foo', '');
TRUE;
STARTS_WITH(NULL, y);
STARTS_WITH(NULL, y);
STARTS_WITH(x, y);
STARTS_WITH(x, y);
STARTS_WITH('x', y);
STARTS_WITH('x', y);
STARTS_WITH(x, 'y');
STARTS_WITH(x, 'y');

View file

@ -527,10 +527,11 @@ SELECT
"t_s_secyear"."customer_last_name" AS "customer_last_name",
"t_s_secyear"."customer_preferred_cust_flag" AS "customer_preferred_cust_flag"
FROM "year_total" AS "t_s_firstyear"
JOIN "year_total" AS "t_c_secyear"
ON "t_c_secyear"."customer_id" = "t_s_firstyear"."customer_id"
AND "t_c_secyear"."dyear" = 2002
AND "t_c_secyear"."sale_type" = 'c'
JOIN "year_total" AS "t_c_firstyear"
ON "t_c_firstyear"."customer_id" = "t_s_firstyear"."customer_id"
AND "t_c_firstyear"."dyear" = 2001
AND "t_c_firstyear"."sale_type" = 'c'
AND "t_c_firstyear"."year_total" > 0
JOIN "year_total" AS "t_s_secyear"
ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id"
AND "t_s_secyear"."dyear" = 2002
@ -540,15 +541,10 @@ JOIN "year_total" AS "t_w_firstyear"
AND "t_w_firstyear"."dyear" = 2001
AND "t_w_firstyear"."sale_type" = 'w'
AND "t_w_firstyear"."year_total" > 0
JOIN "year_total" AS "t_w_secyear"
ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id"
AND "t_w_secyear"."dyear" = 2002
AND "t_w_secyear"."sale_type" = 'w'
JOIN "year_total" AS "t_c_firstyear"
ON "t_c_firstyear"."customer_id" = "t_s_firstyear"."customer_id"
AND "t_c_firstyear"."dyear" = 2001
AND "t_c_firstyear"."sale_type" = 'c'
AND "t_c_firstyear"."year_total" > 0
JOIN "year_total" AS "t_c_secyear"
ON "t_c_secyear"."customer_id" = "t_s_firstyear"."customer_id"
AND "t_c_secyear"."dyear" = 2002
AND "t_c_secyear"."sale_type" = 'c'
AND CASE
WHEN "t_c_firstyear"."year_total" > 0
THEN "t_c_secyear"."year_total" / "t_c_firstyear"."year_total"
@ -558,6 +554,10 @@ JOIN "year_total" AS "t_c_firstyear"
THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total"
ELSE NULL
END
JOIN "year_total" AS "t_w_secyear"
ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id"
AND "t_w_secyear"."dyear" = 2002
AND "t_w_secyear"."sale_type" = 'w'
AND CASE
WHEN "t_c_firstyear"."year_total" > 0
THEN "t_c_secyear"."year_total" / "t_c_firstyear"."year_total"
@ -1787,6 +1787,10 @@ SELECT
"t_s_secyear"."customer_last_name" AS "customer_last_name",
"t_s_secyear"."customer_birth_country" AS "customer_birth_country"
FROM "year_total" AS "t_s_firstyear"
JOIN "year_total" AS "t_s_secyear"
ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id"
AND "t_s_secyear"."dyear" = 2002
AND "t_s_secyear"."sale_type" = 's'
JOIN "year_total" AS "t_w_firstyear"
ON "t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id"
AND "t_w_firstyear"."dyear" = 2001
@ -1796,10 +1800,6 @@ JOIN "year_total" AS "t_w_secyear"
ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id"
AND "t_w_secyear"."dyear" = 2002
AND "t_w_secyear"."sale_type" = 'w'
JOIN "year_total" AS "t_s_secyear"
ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id"
AND "t_s_secyear"."dyear" = 2002
AND "t_s_secyear"."sale_type" = 's'
AND CASE
WHEN "t_s_firstyear"."year_total" > 0
THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total"
@ -1934,7 +1934,7 @@ SELECT
AVG("store_sales"."ss_ext_wholesale_cost") AS "_col_2",
SUM("store_sales"."ss_ext_wholesale_cost") AS "_col_3"
FROM "store_sales" AS "store_sales"
CROSS JOIN "customer_demographics" AS "customer_demographics"
CROSS JOIN "household_demographics" AS "household_demographics"
JOIN "customer_address" AS "customer_address"
ON (
"customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk"
@ -1957,10 +1957,7 @@ JOIN "customer_address" AS "customer_address"
AND "store_sales"."ss_net_profit" <= 300
AND "store_sales"."ss_net_profit" >= 150
)
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
AND "date_dim"."d_year" = 2001
JOIN "household_demographics" AS "household_demographics"
JOIN "customer_demographics" AS "customer_demographics"
ON (
"customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk"
AND "customer_demographics"."cd_education_status" = 'Advanced Degree'
@ -1988,6 +1985,9 @@ JOIN "household_demographics" AS "household_demographics"
AND "store_sales"."ss_sales_price" <= 200.00
AND "store_sales"."ss_sales_price" >= 150.00
)
JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
AND "date_dim"."d_year" = 2001
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk";
@ -2343,16 +2343,18 @@ SELECT
"customer_address"."ca_zip" AS "ca_zip",
SUM("catalog_sales"."cs_sales_price") AS "_col_1"
FROM "catalog_sales" AS "catalog_sales"
JOIN "customer_address" AS "customer_address"
ON "catalog_sales"."cs_sales_price" > 500
OR "customer_address"."ca_state" IN ('CA', 'WA', 'GA')
OR SUBSTR("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
JOIN "customer" AS "customer"
ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk"
JOIN "date_dim" AS "date_dim"
ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk"
AND "date_dim"."d_qoy" = 1
AND "date_dim"."d_year" = 1998
JOIN "customer" AS "customer"
ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk"
JOIN "customer_address" AS "customer_address"
ON (
"catalog_sales"."cs_sales_price" > 500
OR "customer_address"."ca_state" IN ('CA', 'WA', 'GA')
OR SUBSTR("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
)
AND "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
GROUP BY
"customer_address"."ca_zip"
@ -2428,7 +2430,7 @@ JOIN "date_dim" AS "date_dim"
ON "cs1"."cs_ship_date_sk" = "date_dim"."d_date_sk"
AND "date_dim"."d_date" >= '2002-3-01'
AND (
CAST('2002-3-01' AS DATE) + INTERVAL '60' day
CAST('2002-3-01' AS DATE) + INTERVAL '60' DAY
) >= CAST("date_dim"."d_date" AS DATE)
WHERE
"_u_3"."_u_4" IS NULL
@ -2669,15 +2671,15 @@ SELECT
FROM "date_dim" AS "date_dim"
JOIN "store_sales" AS "store_sales"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
JOIN "customer" AS "customer"
ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
JOIN "item" AS "item"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manager_id" = 38
JOIN "customer_address" AS "customer_address"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
JOIN "store" AS "store"
ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
JOIN "customer_address" AS "customer_address"
ON SUBSTR("customer_address"."ca_zip", 1, 5) <> SUBSTR("store"."s_zip", 1, 5)
JOIN "customer" AS "customer"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
AND "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
AND SUBSTR("customer_address"."ca_zip", 1, 5) <> SUBSTR("store"."s_zip", 1, 5)
WHERE
"date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1998
GROUP BY
@ -3109,6 +3111,8 @@ WITH "ssales" AS (
"item"."i_color" AS "i_color",
SUM("store_sales"."ss_net_profit") AS "netpaid"
FROM "store_sales" AS "store_sales"
JOIN "customer" AS "customer"
ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
JOIN "item" AS "item"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk"
JOIN "store" AS "store"
@ -3117,10 +3121,8 @@ WITH "ssales" AS (
ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk"
AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number"
JOIN "customer_address" AS "customer_address"
ON "customer_address"."ca_zip" = "store"."s_zip"
JOIN "customer" AS "customer"
ON "customer"."c_birth_country" = UPPER("customer_address"."ca_country")
AND "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
AND "customer_address"."ca_zip" = "store"."s_zip"
GROUP BY
"customer"."c_last_name",
"customer"."c_first_name",
@ -3850,6 +3852,8 @@ JOIN "ss" AS "ss2"
ON "ss1"."ca_county" = "ss2"."ca_county" AND "ss2"."d_qoy" = 2 AND "ss2"."d_year" = 2001
JOIN "ws" AS "ws1"
ON "ss1"."ca_county" = "ws1"."ca_county" AND "ws1"."d_qoy" = 1 AND "ws1"."d_year" = 2001
JOIN "ss" AS "ss3"
ON "ss2"."ca_county" = "ss3"."ca_county" AND "ss3"."d_qoy" = 3 AND "ss3"."d_year" = 2001
JOIN "ws" AS "ws2"
ON "ws1"."ca_county" = "ws2"."ca_county"
AND "ws2"."d_qoy" = 2
@ -3864,11 +3868,9 @@ JOIN "ws" AS "ws2"
ELSE NULL
END
JOIN "ws" AS "ws3"
ON "ws1"."ca_county" = "ws3"."ca_county" AND "ws3"."d_qoy" = 3 AND "ws3"."d_year" = 2001
JOIN "ss" AS "ss3"
ON "ss2"."ca_county" = "ss3"."ca_county"
AND "ss3"."d_qoy" = 3
AND "ss3"."d_year" = 2001
ON "ws1"."ca_county" = "ws3"."ca_county"
AND "ws3"."d_qoy" = 3
AND "ws3"."d_year" = 2001
AND CASE
WHEN "ss2"."store_sales" > 0
THEN "ss3"."store_sales" / "ss2"."store_sales"
@ -7013,22 +7015,22 @@ SELECT
"ss_items"."ss_item_rev" + "cs_items"."cs_item_rev" + "ws_items"."ws_item_rev"
) / 3 AS "average"
FROM "ss_items"
JOIN "ws_items"
ON "ss_items"."item_id" = "ws_items"."item_id"
AND "ss_items"."ss_item_rev" <= 1.1 * "ws_items"."ws_item_rev"
AND "ss_items"."ss_item_rev" >= 0.9 * "ws_items"."ws_item_rev"
AND "ws_items"."ws_item_rev" <= 1.1 * "ss_items"."ss_item_rev"
AND "ws_items"."ws_item_rev" >= 0.9 * "ss_items"."ss_item_rev"
JOIN "cs_items"
ON "cs_items"."cs_item_rev" <= 1.1 * "ss_items"."ss_item_rev"
AND "cs_items"."cs_item_rev" <= 1.1 * "ws_items"."ws_item_rev"
AND "cs_items"."cs_item_rev" >= 0.9 * "ss_items"."ss_item_rev"
AND "cs_items"."cs_item_rev" >= 0.9 * "ws_items"."ws_item_rev"
AND "cs_items"."item_id" = "ss_items"."item_id"
AND "ss_items"."ss_item_rev" <= 1.1 * "cs_items"."cs_item_rev"
AND "ss_items"."ss_item_rev" >= 0.9 * "cs_items"."cs_item_rev"
JOIN "ws_items"
ON "cs_items"."cs_item_rev" <= 1.1 * "ws_items"."ws_item_rev"
AND "cs_items"."cs_item_rev" >= 0.9 * "ws_items"."ws_item_rev"
AND "ss_items"."item_id" = "ws_items"."item_id"
AND "ss_items"."ss_item_rev" <= 1.1 * "ws_items"."ws_item_rev"
AND "ss_items"."ss_item_rev" >= 0.9 * "ws_items"."ws_item_rev"
AND "ws_items"."ws_item_rev" <= 1.1 * "cs_items"."cs_item_rev"
AND "ws_items"."ws_item_rev" <= 1.1 * "ss_items"."ss_item_rev"
AND "ws_items"."ws_item_rev" >= 0.9 * "cs_items"."cs_item_rev"
AND "ws_items"."ws_item_rev" >= 0.9 * "ss_items"."ss_item_rev"
ORDER BY
"item_id",
"ss_item_rev"
@ -7915,17 +7917,18 @@ WITH "cs_ui" AS (
SUM("store_sales"."ss_list_price") AS "s2",
SUM("store_sales"."ss_coupon_amt") AS "s3"
FROM "store_sales" AS "store_sales"
CROSS JOIN "income_band" AS "ib2"
JOIN "customer_address" AS "ad1"
ON "ad1"."ca_address_sk" = "store_sales"."ss_addr_sk"
JOIN "customer_demographics" AS "cd1"
ON "cd1"."cd_demo_sk" = "store_sales"."ss_cdemo_sk"
JOIN "cs_ui"
ON "cs_ui"."cs_item_sk" = "store_sales"."ss_item_sk"
JOIN "customer" AS "customer"
ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
JOIN "date_dim" AS "d1"
ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk"
JOIN "household_demographics" AS "hd1"
ON "hd1"."hd_demo_sk" = "store_sales"."ss_hdemo_sk"
JOIN "household_demographics" AS "hd2"
ON "hd2"."hd_income_band_sk" = "ib2"."ib_income_band_sk"
JOIN "item" AS "item"
ON "item"."i_color" IN ('cyan', 'peach', 'blush', 'frosted', 'powder', 'orange')
AND "item"."i_current_price" <= 68
@ -7938,22 +7941,21 @@ WITH "cs_ui" AS (
JOIN "store_returns" AS "store_returns"
ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk"
AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number"
JOIN "customer" AS "customer"
ON "customer"."c_current_hdemo_sk" = "hd2"."hd_demo_sk"
AND "customer"."c_customer_sk" = "store_sales"."ss_customer_sk"
JOIN "income_band" AS "ib1"
ON "hd1"."hd_income_band_sk" = "ib1"."ib_income_band_sk"
JOIN "customer_address" AS "ad2"
ON "ad2"."ca_address_sk" = "customer"."c_current_addr_sk"
JOIN "customer_demographics" AS "cd2"
ON "cd2"."cd_demo_sk" = "customer"."c_current_cdemo_sk"
ON "cd1"."cd_marital_status" <> "cd2"."cd_marital_status"
AND "cd2"."cd_demo_sk" = "customer"."c_current_cdemo_sk"
JOIN "date_dim" AS "d2"
ON "customer"."c_first_sales_date_sk" = "d2"."d_date_sk"
JOIN "date_dim" AS "d3"
ON "customer"."c_first_shipto_date_sk" = "d3"."d_date_sk"
JOIN "customer_demographics" AS "cd1"
ON "cd1"."cd_demo_sk" = "store_sales"."ss_cdemo_sk"
AND "cd1"."cd_marital_status" <> "cd2"."cd_marital_status"
JOIN "household_demographics" AS "hd2"
ON "customer"."c_current_hdemo_sk" = "hd2"."hd_demo_sk"
JOIN "income_band" AS "ib1"
ON "hd1"."hd_income_band_sk" = "ib1"."ib_income_band_sk"
JOIN "income_band" AS "ib2"
ON "hd2"."hd_income_band_sk" = "ib2"."ib_income_band_sk"
GROUP BY
"item"."i_product_name",
"item"."i_item_sk",
@ -9523,8 +9525,8 @@ LEFT JOIN "catalog_returns" AS "catalog_returns"
JOIN "customer_demographics" AS "customer_demographics"
ON "catalog_sales"."cs_bill_cdemo_sk" = "customer_demographics"."cd_demo_sk"
AND "customer_demographics"."cd_marital_status" = 'M'
JOIN "date_dim" AS "d3"
ON "catalog_sales"."cs_ship_date_sk" = "d3"."d_date_sk"
JOIN "date_dim" AS "d1"
ON "catalog_sales"."cs_sold_date_sk" = "d1"."d_date_sk" AND "d1"."d_year" = 2002
JOIN "household_demographics" AS "household_demographics"
ON "catalog_sales"."cs_bill_hdemo_sk" = "household_demographics"."hd_demo_sk"
AND "household_demographics"."hd_buy_potential" = '501-1000'
@ -9536,14 +9538,13 @@ JOIN "item" AS "item"
LEFT JOIN "promotion" AS "promotion"
ON "catalog_sales"."cs_promo_sk" = "promotion"."p_promo_sk"
JOIN "date_dim" AS "d2"
ON "d2"."d_date_sk" = "inventory"."inv_date_sk"
ON "d1"."d_week_seq" = "d2"."d_week_seq"
AND "d2"."d_date_sk" = "inventory"."inv_date_sk"
JOIN "date_dim" AS "d3"
ON "catalog_sales"."cs_ship_date_sk" = "d3"."d_date_sk"
AND "d3"."d_date" > "d1"."d_date" + INTERVAL '5' DAY
JOIN "warehouse" AS "warehouse"
ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk"
JOIN "date_dim" AS "d1"
ON "catalog_sales"."cs_sold_date_sk" = "d1"."d_date_sk"
AND "d1"."d_week_seq" = "d2"."d_week_seq"
AND "d1"."d_year" = 2002
AND "d3"."d_date" > "d1"."d_date" + INTERVAL '5' day
GROUP BY
"item"."i_item_desc",
"warehouse"."w_warehouse_name",
@ -9769,6 +9770,10 @@ SELECT
"t_s_secyear"."customer_first_name" AS "customer_first_name",
"t_s_secyear"."customer_last_name" AS "customer_last_name"
FROM "year_total" AS "t_s_firstyear"
JOIN "year_total" AS "t_s_secyear"
ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id"
AND "t_s_secyear"."sale_type" = 's'
AND "t_s_secyear"."year1" = 2000
JOIN "year_total" AS "t_w_firstyear"
ON "t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id"
AND "t_w_firstyear"."sale_type" = 'w'
@ -9778,10 +9783,6 @@ JOIN "year_total" AS "t_w_secyear"
ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id"
AND "t_w_secyear"."sale_type" = 'w'
AND "t_w_secyear"."year1" = 2000
JOIN "year_total" AS "t_s_secyear"
ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id"
AND "t_s_secyear"."sale_type" = 's'
AND "t_s_secyear"."year1" = 2000
AND CASE
WHEN "t_s_firstyear"."year_total" > 0
THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total"
@ -11369,8 +11370,8 @@ JOIN "web_page" AS "web_page"
JOIN "web_returns" AS "web_returns"
ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk"
AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number"
JOIN "customer_demographics" AS "cd2"
ON "cd2"."cd_demo_sk" = "web_returns"."wr_returning_cdemo_sk"
JOIN "customer_demographics" AS "cd1"
ON "cd1"."cd_demo_sk" = "web_returns"."wr_refunded_cdemo_sk"
JOIN "customer_address" AS "customer_address"
ON "customer_address"."ca_address_sk" = "web_returns"."wr_refunded_addr_sk"
AND (
@ -11395,8 +11396,8 @@ JOIN "customer_address" AS "customer_address"
)
JOIN "reason" AS "reason"
ON "reason"."r_reason_sk" = "web_returns"."wr_reason_sk"
JOIN "customer_demographics" AS "cd1"
ON "cd1"."cd_demo_sk" = "web_returns"."wr_refunded_cdemo_sk"
JOIN "customer_demographics" AS "cd2"
ON "cd2"."cd_demo_sk" = "web_returns"."wr_returning_cdemo_sk"
AND (
(
"cd1"."cd_education_status" = "cd2"."cd_education_status"
@ -12032,13 +12033,14 @@ SELECT
"call_center"."cc_manager" AS "manager",
SUM("catalog_returns"."cr_net_loss") AS "returns_loss"
FROM "call_center" AS "call_center"
JOIN "household_demographics" AS "household_demographics"
ON "household_demographics"."hd_buy_potential" LIKE 'Unknown%'
JOIN "customer" AS "customer"
ON "customer"."c_current_hdemo_sk" = "household_demographics"."hd_demo_sk"
JOIN "catalog_returns" AS "catalog_returns"
ON "call_center"."cc_call_center_sk" = "catalog_returns"."cr_call_center_sk"
AND "catalog_returns"."cr_returning_customer_sk" = "customer"."c_customer_sk"
JOIN "customer" AS "customer"
ON "catalog_returns"."cr_returning_customer_sk" = "customer"."c_customer_sk"
JOIN "date_dim" AS "date_dim"
ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk"
AND "date_dim"."d_moy" = 12
AND "date_dim"."d_year" = 1999
JOIN "customer_address" AS "customer_address"
ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
AND "customer_address"."ca_gmt_offset" = -7
@ -12060,10 +12062,9 @@ JOIN "customer_demographics" AS "customer_demographics"
"customer_demographics"."cd_marital_status" = 'M'
OR "customer_demographics"."cd_marital_status" = 'W'
)
JOIN "date_dim" AS "date_dim"
ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk"
AND "date_dim"."d_moy" = 12
AND "date_dim"."d_year" = 1999
JOIN "household_demographics" AS "household_demographics"
ON "customer"."c_current_hdemo_sk" = "household_demographics"."hd_demo_sk"
AND "household_demographics"."hd_buy_potential" LIKE 'Unknown%'
GROUP BY
"call_center"."cc_call_center_id",
"call_center"."cc_name",
@ -12249,7 +12250,7 @@ JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date" >= '2000-3-01'
AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk"
AND (
CAST('2000-3-01' AS DATE) + INTERVAL '60' day
CAST('2000-3-01' AS DATE) + INTERVAL '60' DAY
) >= CAST("date_dim"."d_date" AS DATE)
JOIN "web_site" AS "web_site"
ON "web_site"."web_company_name" = 'pri'
@ -12339,7 +12340,7 @@ JOIN "date_dim" AS "date_dim"
ON "date_dim"."d_date" >= '2000-4-01'
AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk"
AND (
CAST('2000-4-01' AS DATE) + INTERVAL '60' day
CAST('2000-4-01' AS DATE) + INTERVAL '60' DAY
) >= CAST("date_dim"."d_date" AS DATE)
JOIN "web_site" AS "web_site"
ON "web_site"."web_company_name" = 'pri'

View file

@ -99,19 +99,19 @@ order by
p_partkey
limit
100;
WITH "region_2" AS (
WITH "partsupp_2" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
), "region_2" AS (
SELECT
"region"."r_regionkey" AS "r_regionkey",
"region"."r_name" AS "r_name"
FROM "region" AS "region"
WHERE
"region"."r_name" = 'EUROPE'
), "partsupp_2" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
), "_u_0" AS (
SELECT
MIN("partsupp"."ps_supplycost") AS "_col_0",
@ -136,16 +136,16 @@ SELECT
"supplier"."s_phone" AS "s_phone",
"supplier"."s_comment" AS "s_comment"
FROM "part" AS "part"
CROSS JOIN "region_2" AS "region"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "part"."p_partkey"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "partsupp_2" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN "supplier" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
AND "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "region_2" AS "region"
ON "nation"."n_regionkey" = "region"."r_regionkey"
WHERE
"_u_0"."_col_0" = "partsupp"."ps_supplycost"
AND "part"."p_size" = 15
@ -294,10 +294,10 @@ JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND CAST("orders"."o_orderdate" AS DATE) < CAST('1995-01-01' AS DATE)
AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1994-01-01' AS DATE)
JOIN "supplier" AS "supplier"
ON "customer"."c_nationkey" = "supplier"."s_nationkey"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
JOIN "supplier" AS "supplier"
ON "customer"."c_nationkey" = "supplier"."s_nationkey"
AND "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
@ -389,8 +389,13 @@ JOIN "nation" AS "n1"
"n1"."n_name" = 'FRANCE' OR "n1"."n_name" = 'GERMANY'
)
AND "n1"."n_nationkey" = "supplier"."s_nationkey"
JOIN "orders" AS "orders"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
JOIN "customer" AS "customer"
ON "customer"."c_custkey" = "orders"."o_custkey"
JOIN "nation" AS "n2"
ON (
ON "customer"."c_nationkey" = "n2"."n_nationkey"
AND (
"n1"."n_name" = 'FRANCE' OR "n2"."n_name" = 'FRANCE'
)
AND (
@ -399,11 +404,6 @@ JOIN "nation" AS "n2"
AND (
"n2"."n_name" = 'FRANCE' OR "n2"."n_name" = 'GERMANY'
)
JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "n2"."n_nationkey"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND "lineitem"."l_orderkey" = "orders"."o_orderkey"
GROUP BY
"n1"."n_name",
"n2"."n_name",
@ -467,23 +467,22 @@ SELECT
1 - "lineitem"."l_discount"
)) AS "mkt_share"
FROM "part" AS "part"
JOIN "region" AS "region"
ON "region"."r_name" = 'AMERICA'
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_partkey" = "part"."p_partkey"
JOIN "nation" AS "n1"
ON "n1"."n_regionkey" = "region"."r_regionkey"
JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "n1"."n_nationkey"
JOIN "supplier" AS "supplier"
ON "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "n2"
ON "n2"."n_nationkey" = "supplier"."s_nationkey"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND "lineitem"."l_orderkey" = "orders"."o_orderkey"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND CAST("orders"."o_orderdate" AS DATE) <= CAST('1996-12-31' AS DATE)
AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1995-01-01' AS DATE)
JOIN "supplier" AS "supplier"
ON "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "customer" AS "customer"
ON "customer"."c_custkey" = "orders"."o_custkey"
JOIN "nation" AS "n2"
ON "n2"."n_nationkey" = "supplier"."s_nationkey"
JOIN "nation" AS "n1"
ON "customer"."c_nationkey" = "n1"."n_nationkey"
JOIN "region" AS "region"
ON "n1"."n_regionkey" = "region"."r_regionkey" AND "region"."r_name" = 'AMERICA'
WHERE
"part"."p_type" = 'ECONOMY ANODIZED STEEL'
GROUP BY
@ -1126,6 +1125,10 @@ FROM "lineitem" AS "lineitem"
JOIN "part" AS "part"
ON (
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 11
AND "lineitem"."l_quantity" >= 1
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#12'
AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND "part"."p_size" <= 5
@ -1133,6 +1136,10 @@ JOIN "part" AS "part"
)
OR (
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 20
AND "lineitem"."l_quantity" >= 10
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#23'
AND "part"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND "part"."p_size" <= 10
@ -1140,6 +1147,10 @@ JOIN "part" AS "part"
)
OR (
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 30
AND "lineitem"."l_quantity" >= 20
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#34'
AND "part"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND "part"."p_size" <= 15

View file

@ -86,6 +86,8 @@ class TestBuild(unittest.TestCase):
"CASE x WHEN 1 THEN x ELSE bar END",
),
(lambda: exp.func("COALESCE", "x", 1), "COALESCE(x, 1)"),
(lambda: exp.column("x").desc(), "x DESC"),
(lambda: exp.column("x").desc(nulls_first=True), "x DESC NULLS FIRST"),
(lambda: select("x"), "SELECT x"),
(lambda: select("x"), "SELECT x"),
(lambda: select("x", "y"), "SELECT x, y"),
@ -630,6 +632,10 @@ class TestBuild(unittest.TestCase):
lambda: exp.insert("VALUES (1, 2), (3, 4)", "tbl", columns=["cola", "colb"]),
"INSERT INTO tbl (cola, colb) VALUES (1, 2), (3, 4)",
),
(
lambda: exp.insert("VALUES (1), (2)", "tbl", columns=["col a"]),
'INSERT INTO tbl ("col a") VALUES (1), (2)',
),
(
lambda: exp.insert("SELECT * FROM cte", "t").with_("cte", as_="SELECT x FROM tbl"),
"WITH cte AS (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte",

View file

@ -671,6 +671,10 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("*"), exp.Star)
self.assertEqual(exp.column("a", table="b", db="c", catalog="d"), exp.to_column("d.c.b.a"))
dot = exp.column("d", "c", "b", "a", fields=["e", "f"])
self.assertIsInstance(dot, exp.Dot)
self.assertEqual(dot.sql(), "a.b.c.d.e.f")
def test_text(self):
column = parse_one("a.b.c.d.e")
self.assertEqual(column.text("expression"), "e")
@ -811,14 +815,14 @@ FROM foo""",
)
def test_to_interval(self):
self.assertEqual(exp.to_interval("1day").sql(), "INTERVAL '1' day")
self.assertEqual(exp.to_interval(" 5 months").sql(), "INTERVAL '5' months")
self.assertEqual(exp.to_interval("1day").sql(), "INTERVAL '1' DAY")
self.assertEqual(exp.to_interval(" 5 months").sql(), "INTERVAL '5' MONTHS")
with self.assertRaises(ValueError):
exp.to_interval("bla")
self.assertEqual(exp.to_interval(exp.Literal.string("1day")).sql(), "INTERVAL '1' day")
self.assertEqual(exp.to_interval(exp.Literal.string("1day")).sql(), "INTERVAL '1' DAY")
self.assertEqual(
exp.to_interval(exp.Literal.string(" 5 months")).sql(), "INTERVAL '5' months"
exp.to_interval(exp.Literal.string(" 5 months")).sql(), "INTERVAL '5' MONTHS"
)
with self.assertRaises(ValueError):
exp.to_interval(exp.Literal.string("bla"))

View file

@ -32,7 +32,7 @@ class TestGenerator(unittest.TestCase):
self.assertEqual(
exp.DateTrunc(this=exp.to_column("event_date"), unit=exp.var("MONTH")).sql(),
"DATE_TRUNC(MONTH, event_date)",
"DATE_TRUNC('MONTH', event_date)",
)
def test_identify(self):

View file

@ -296,3 +296,10 @@ class TestLineage(unittest.TestCase):
downstream = downstream.downstream[0]
self.assertEqual(downstream.name, "*")
self.assertEqual(downstream.source.sql(), "table_a AS table_a")
def test_unnest(self) -> None:
node = lineage(
"b",
"with _data as (select [struct(1 as a, 2 as b)] as col) select b from _data cross join unnest(col)",
)
self.assertEqual(node.name, "b")

View file

@ -227,6 +227,15 @@ class TestOptimizer(unittest.TestCase):
@patch("sqlglot.generator.logger")
def test_qualify_columns(self, logger):
self.assertEqual(
optimizer.qualify_columns.qualify_columns(
parse_one("WITH x AS (SELECT a FROM db.y) SELECT * FROM db.x"),
schema={"db": {"x": {"z": "int"}, "y": {"a": "int"}}},
expand_stars=False,
).sql(),
"WITH x AS (SELECT y.a AS a FROM db.y) SELECT * FROM db.x",
)
self.assertEqual(
optimizer.qualify_columns.qualify_columns(
parse_one("WITH x AS (SELECT a FROM db.y) SELECT z FROM db.x"),
@ -285,6 +294,11 @@ class TestOptimizer(unittest.TestCase):
schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}})
self.check_file("qualify_columns__with_invisible", qualify_columns, schema=schema)
def test_pushdown_cte_alias_columns(self):
self.check_file(
"pushdown_cte_alias_columns", optimizer.qualify_columns.pushdown_cte_alias_columns
)
def test_qualify_columns__invalid(self):
for sql in load_sql_fixtures("optimizer/qualify_columns__invalid.sql"):
with self.subTest(sql):
@ -534,6 +548,22 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
level="warning",
)
def test_struct_type_annotation(self):
tests = {
"SELECT STRUCT(1 AS col)": "STRUCT<col INT>",
"SELECT STRUCT(1 AS col, 2.5 AS row)": "STRUCT<col INT, row DOUBLE>",
"SELECT STRUCT(1)": "STRUCT<INT>",
"SELECT STRUCT(1 AS col, 2.5 AS row, struct(3.5 AS inner_col, 4 AS inner_row) AS nested_struct)": "STRUCT<col INT, row DOUBLE, nested_struct STRUCT<inner_col DOUBLE, inner_row INT>>",
"SELECT STRUCT(1 AS col, 2.5, ARRAY[1, 2, 3] AS nested_array, 'foo')": "STRUCT<col INT, DOUBLE, nested_array ARRAY<INT>, VARCHAR>",
"SELECT STRUCT(1, 2.5, 'bar')": "STRUCT<INT, DOUBLE, VARCHAR>",
'SELECT STRUCT(1 AS "CaseSensitive")': 'STRUCT<"CaseSensitive" INT>',
}
for sql, target_type in tests.items():
with self.subTest(sql):
expression = annotate_types(parse_one(sql))
assert expression.expressions[0].is_type(target_type)
def test_literal_type_annotation(self):
tests = {
"SELECT 5": exp.DataType.Type.INT,

View file

@ -86,6 +86,15 @@ class TestParser(unittest.TestCase):
self.assertIsNotNone(parse_one("date").find(exp.Column))
def test_structs(self):
cast = parse_one("cast(x as struct<int>)")
self.assertIsInstance(cast.to.expressions[0], exp.DataType)
self.assertEqual(cast.sql(), "CAST(x AS STRUCT<INT>)")
cast = parse_one("cast(x as struct<varchar(10)>)")
self.assertIsInstance(cast.to.expressions[0], exp.DataType)
self.assertEqual(cast.sql(), "CAST(x AS STRUCT<VARCHAR(10)>)")
def test_float(self):
self.assertEqual(parse_one(".2"), parse_one("0.2"))
@ -772,7 +781,7 @@ class TestParser(unittest.TestCase):
)
self.assertEqual(ast.find(exp.Interval).this.sql(), "'71'")
self.assertEqual(ast.find(exp.Interval).unit.assert_is(exp.Var).sql(), "days")
self.assertEqual(ast.find(exp.Interval).unit.assert_is(exp.Var).sql(), "DAYS")
def test_parse_concat_ws(self):
ast = parse_one("CONCAT_WS(' ', 'John', 'Doe')")

View file

@ -166,32 +166,30 @@ class TestSchema(unittest.TestCase):
schema = MappingSchema({"A": {"b": "varchar"}})
self.assertEqual(schema.get_column_type("a", "B").this, exp.DataType.Type.VARCHAR)
self.assertEqual(
schema.get_column_type(exp.Table(this="a"), exp.Column(this="b")).this,
schema.get_column_type(exp.table_("a"), exp.column("b")).this,
exp.DataType.Type.VARCHAR,
)
self.assertEqual(
schema.get_column_type("a", exp.Column(this="b")).this, exp.DataType.Type.VARCHAR
schema.get_column_type("a", exp.column("b")).this, exp.DataType.Type.VARCHAR
)
self.assertEqual(
schema.get_column_type(exp.Table(this="a"), "b").this, exp.DataType.Type.VARCHAR
schema.get_column_type(exp.table_("a"), "b").this, exp.DataType.Type.VARCHAR
)
schema = MappingSchema({"a": {"b": {"c": "varchar"}}})
self.assertEqual(
schema.get_column_type(exp.Table(this="b", db="a"), exp.Column(this="c")).this,
schema.get_column_type(exp.table_("b", db="a"), exp.column("c")).this,
exp.DataType.Type.VARCHAR,
)
self.assertEqual(
schema.get_column_type(exp.Table(this="b", db="a"), "c").this, exp.DataType.Type.VARCHAR
schema.get_column_type(exp.table_("b", db="a"), "c").this, exp.DataType.Type.VARCHAR
)
schema = MappingSchema({"a": {"b": {"c": {"d": "varchar"}}}})
self.assertEqual(
schema.get_column_type(
exp.Table(this="c", db="b", catalog="a"), exp.Column(this="d")
).this,
schema.get_column_type(exp.table_("c", db="b", catalog="a"), exp.column("d")).this,
exp.DataType.Type.VARCHAR,
)
self.assertEqual(
schema.get_column_type(exp.Table(this="c", db="b", catalog="a"), "d").this,
schema.get_column_type(exp.table_("c", db="b", catalog="a"), "d").this,
exp.DataType.Type.VARCHAR,
)
@ -204,14 +202,14 @@ class TestSchema(unittest.TestCase):
dialect="clickhouse",
)
table_z = exp.Table(this="z", db="y", catalog="x")
table_w = exp.Table(this="w", db="y", catalog="x")
table_z = exp.table_("z", db="y", catalog="x")
table_w = exp.table_("w", db="y", catalog="x")
self.assertEqual(schema.column_names(table_z), ["a", "B"])
self.assertEqual(schema.column_names(table_w), ["c"])
schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse")
self.assertEqual(schema.column_names(exp.Table(this="x")), ["y"])
self.assertEqual(schema.column_names(exp.table_("x")), ["y"])
# Check that add_table normalizes both the table and the column names to be added / updated
schema = MappingSchema()

View file

@ -92,6 +92,10 @@ class TestTranspile(unittest.TestCase):
self.validate("SELECT a\r\nFROM b", "SELECT a FROM b")
def test_comments(self):
self.validate(
"SELECT c /* foo */ AS alias",
"SELECT c AS alias /* foo */",
)
self.validate(
"SELECT c AS /* foo */ (a, b, c) FROM t",
"SELECT c AS (a, b, c) /* foo */ FROM t",
@ -366,7 +370,7 @@ WHERE
""",
"""SELECT
col, /* This is testing comments */
CASE WHEN a THEN b ELSE c END /* 2nd testing comments */ AS d
CASE WHEN a THEN b ELSE c END AS d /* 2nd testing comments */
FROM t""",
pretty=True,
)
@ -571,11 +575,11 @@ FROM base""",
)
def test_time(self):
self.validate("INTERVAL '1 day'", "INTERVAL '1' day")
self.validate("INTERVAL '1 days' * 5", "INTERVAL '1' days * 5")
self.validate("5 * INTERVAL '1 day'", "5 * INTERVAL '1' day")
self.validate("INTERVAL 1 day", "INTERVAL '1' day")
self.validate("INTERVAL 2 months", "INTERVAL '2' months")
self.validate("INTERVAL '1 day'", "INTERVAL '1' DAY")
self.validate("INTERVAL '1 days' * 5", "INTERVAL '1' DAYS * 5")
self.validate("5 * INTERVAL '1 day'", "5 * INTERVAL '1' DAY")
self.validate("INTERVAL 1 day", "INTERVAL '1' DAY")
self.validate("INTERVAL 2 months", "INTERVAL '2' MONTHS")
self.validate("TIMESTAMP '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMP)")
self.validate("TIMESTAMP WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ)")
self.validate(