1
0
Fork 0

Merging upstream version 17.7.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 20:46:55 +01:00
parent 87252470ef
commit 137902868c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
93 changed files with 41580 additions and 39040 deletions

View file

@ -851,9 +851,9 @@ class TestFunctions(unittest.TestCase):
def test_to_timestamp(self):
col_str = SF.to_timestamp("cola")
self.assertEqual("TO_TIMESTAMP(cola)", col_str.sql())
self.assertEqual("CAST(cola AS TIMESTAMP)", col_str.sql())
col = SF.to_timestamp(SF.col("cola"))
self.assertEqual("TO_TIMESTAMP(cola)", col.sql())
self.assertEqual("CAST(cola AS TIMESTAMP)", col.sql())
col_with_format = SF.to_timestamp("cola", "yyyy-MM-dd")
self.assertEqual("TO_TIMESTAMP(cola, 'yyyy-MM-dd')", col_with_format.sql())

View file

@ -1,6 +1,6 @@
from unittest import mock
from sqlglot import ErrorLevel, ParseError, UnsupportedError, transpile
from sqlglot import ErrorLevel, ParseError, TokenError, UnsupportedError, transpile
from tests.dialects.test_dialect import Validator
@ -8,7 +8,7 @@ class TestBigQuery(Validator):
dialect = "bigquery"
def test_bigquery(self):
with self.assertRaises(ValueError):
with self.assertRaises(TokenError):
transpile("'\\'", read="bigquery")
# Reference: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators
@ -520,6 +520,12 @@ class TestBigQuery(Validator):
"bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, 'test' AS _c1)])",
},
)
self.validate_all(
"SELECT * FROM UNNEST([STRUCT(1 AS id)]) CROSS JOIN UNNEST([STRUCT(1 AS id)])",
read={
"postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)",
},
)
self.validate_all(
"SELECT cola, colb, colc FROM (VALUES (1, 'test', NULL)) AS tab(cola, colb, colc)",
write={

View file

@ -6,6 +6,7 @@ class TestClickhouse(Validator):
dialect = "clickhouse"
def test_clickhouse(self):
self.validate_identity("SELECT xor(TRUE, FALSE)")
self.validate_identity("ATTACH DATABASE DEFAULT ENGINE = ORDINARY")
self.validate_identity("CAST(['hello'], 'Array(Enum8(''hello'' = 1))')")
self.validate_identity("SELECT x, COUNT() FROM y GROUP BY x WITH TOTALS")
@ -52,6 +53,34 @@ class TestClickhouse(Validator):
"CREATE MATERIALIZED VIEW test_view (id UInt8) TO db.table1 AS SELECT * FROM test_data"
)
self.validate_all(
"SELECT xor(1, 0)",
read={
"clickhouse": "SELECT xor(1, 0)",
"mysql": "SELECT 1 XOR 0",
},
write={
"mysql": "SELECT 1 XOR 0",
},
)
self.validate_all(
"SELECT xor(0, 1, xor(1, 0, 0))",
write={
"clickhouse": "SELECT xor(0, 1, xor(1, 0, 0))",
"mysql": "SELECT 0 XOR 1 XOR 1 XOR 0 XOR 0",
},
)
self.validate_all(
"SELECT xor(xor(1, 0), 1)",
read={
"clickhouse": "SELECT xor(xor(1, 0), 1)",
"mysql": "SELECT 1 XOR 0 XOR 1",
},
write={
"clickhouse": "SELECT xor(xor(1, 0), 1)",
"mysql": "SELECT 1 XOR 0 XOR 1",
},
)
self.validate_all(
"CONCAT(CASE WHEN COALESCE(CAST(a AS TEXT), '') IS NULL THEN COALESCE(CAST(a AS TEXT), '') ELSE CAST(COALESCE(CAST(a AS TEXT), '') AS TEXT) END, CASE WHEN COALESCE(CAST(b AS TEXT), '') IS NULL THEN COALESCE(CAST(b AS TEXT), '') ELSE CAST(COALESCE(CAST(b AS TEXT), '') AS TEXT) END)",
read={"postgres": "CONCAT(a, b)"},
@ -137,6 +166,11 @@ class TestClickhouse(Validator):
+ " GROUP BY loyalty ORDER BY loyalty"
},
)
self.validate_identity("SELECT s, arr FROM arrays_test ARRAY JOIN arr")
self.validate_identity("SELECT s, arr, a FROM arrays_test LEFT ARRAY JOIN arr AS a")
self.validate_identity(
"SELECT s, arr_external FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external"
)
def test_cte(self):
self.validate_identity("WITH 'x' AS foo SELECT foo")

View file

@ -81,7 +81,7 @@ class TestDialect(Validator):
"clickhouse": "CAST(a AS TEXT)",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"mysql": "CAST(a AS TEXT)",
"mysql": "CAST(a AS CHAR)",
"hive": "CAST(a AS STRING)",
"oracle": "CAST(a AS CLOB)",
"postgres": "CAST(a AS TEXT)",
@ -160,7 +160,7 @@ class TestDialect(Validator):
"bigquery": "CAST(a AS STRING)",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"mysql": "CAST(a AS TEXT)",
"mysql": "CAST(a AS CHAR)",
"hive": "CAST(a AS STRING)",
"oracle": "CAST(a AS CLOB)",
"postgres": "CAST(a AS TEXT)",
@ -177,7 +177,7 @@ class TestDialect(Validator):
"bigquery": "CAST(a AS STRING)",
"drill": "CAST(a AS VARCHAR)",
"duckdb": "CAST(a AS TEXT)",
"mysql": "CAST(a AS VARCHAR)",
"mysql": "CAST(a AS CHAR)",
"hive": "CAST(a AS STRING)",
"oracle": "CAST(a AS VARCHAR2)",
"postgres": "CAST(a AS VARCHAR)",
@ -194,7 +194,7 @@ class TestDialect(Validator):
"bigquery": "CAST(a AS STRING)",
"drill": "CAST(a AS VARCHAR(3))",
"duckdb": "CAST(a AS TEXT(3))",
"mysql": "CAST(a AS VARCHAR(3))",
"mysql": "CAST(a AS CHAR(3))",
"hive": "CAST(a AS VARCHAR(3))",
"oracle": "CAST(a AS VARCHAR2(3))",
"postgres": "CAST(a AS VARCHAR(3))",
@ -224,13 +224,13 @@ class TestDialect(Validator):
},
)
self.validate_all(
"TRY_CAST(a AS DOUBLE)",
"CAST(a AS DOUBLE)",
read={
"postgres": "CAST(a AS DOUBLE PRECISION)",
"redshift": "CAST(a AS DOUBLE PRECISION)",
},
write={
"duckdb": "TRY_CAST(a AS DOUBLE)",
"duckdb": "CAST(a AS DOUBLE)",
"drill": "CAST(a AS DOUBLE)",
"postgres": "CAST(a AS DOUBLE PRECISION)",
"redshift": "CAST(a AS DOUBLE PRECISION)",
@ -634,7 +634,7 @@ class TestDialect(Validator):
},
)
self.validate_all(
"TIMESTAMP_TRUNC(TRY_CAST(x AS DATE), day)",
"TIMESTAMP_TRUNC(CAST(x AS DATE), day)",
read={"postgres": "DATE_TRUNC('day', x::DATE)"},
)
self.validate_all(
@ -1512,6 +1512,7 @@ SELECT
"redshift": "BEGIN",
"snowflake": "BEGIN",
"sqlite": "BEGIN TRANSACTION",
"tsql": "BEGIN TRANSACTION",
},
)
self.validate_all(

View file

@ -5,156 +5,9 @@ from tests.dialects.test_dialect import Validator
class TestDuckDB(Validator):
dialect = "duckdb"
def test_time(self):
self.validate_identity("SELECT CURRENT_DATE")
self.validate_identity("SELECT CURRENT_TIMESTAMP")
self.validate_all(
"SELECT MAKE_DATE(2016, 12, 25)", read={"bigquery": "SELECT DATE(2016, 12, 25)"}
)
self.validate_all(
"SELECT CAST(CAST('2016-12-25 23:59:59' AS DATETIME) AS DATE)",
read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"},
)
self.validate_all(
"SELECT STRPTIME(STRFTIME(CAST(CAST('2016-12-25' AS TIMESTAMPTZ) AS DATE), '%d/%m/%Y') || ' ' || 'America/Los_Angeles', '%d/%m/%Y %Z')",
read={
"bigquery": "SELECT DATE(TIMESTAMP '2016-12-25', 'America/Los_Angeles')",
},
)
self.validate_all(
"SELECT CAST(CAST(STRPTIME('05/06/2020', '%m/%d/%Y') AS DATE) AS DATE)",
read={"bigquery": "SELECT DATE(PARSE_DATE('%m/%d/%Y', '05/06/2020'))"},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS DATE) + INTERVAL (-1) DAY",
read={"mysql": "SELECT DATE '2020-01-01' + INTERVAL -1 DAY"},
)
self.validate_all(
"SELECT INTERVAL '1 quarter'",
write={"duckdb": "SELECT (90 * INTERVAL '1' day)"},
)
self.validate_all(
"SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) day) + (7 * INTERVAL (-5) day))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
)
self.validate_all(
"EPOCH(x)",
read={
"presto": "TO_UNIXTIME(x)",
},
write={
"bigquery": "TIME_TO_UNIX(x)",
"duckdb": "EPOCH(x)",
"presto": "TO_UNIXTIME(x)",
"spark": "UNIX_TIMESTAMP(x)",
},
)
self.validate_all(
"EPOCH_MS(x)",
write={
"bigquery": "UNIX_TO_TIME(x / 1000)",
"duckdb": "TO_TIMESTAMP(x / 1000)",
"presto": "FROM_UNIXTIME(x / 1000)",
"spark": "CAST(FROM_UNIXTIME(x / 1000) AS TIMESTAMP)",
},
)
self.validate_all(
"STRFTIME(x, '%y-%-m-%S')",
write={
"bigquery": "TIME_TO_STR(x, '%y-%-m-%S')",
"duckdb": "STRFTIME(x, '%y-%-m-%S')",
"postgres": "TO_CHAR(x, 'YY-FMMM-SS')",
"presto": "DATE_FORMAT(x, '%y-%c-%S')",
"spark": "DATE_FORMAT(x, 'yy-M-ss')",
},
)
self.validate_all(
"STRFTIME(x, '%Y-%m-%d %H:%M:%S')",
write={
"duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')",
"presto": "DATE_FORMAT(x, '%Y-%m-%d %T')",
"hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')",
},
)
self.validate_all(
"STRPTIME(x, '%y-%-m')",
write={
"bigquery": "PARSE_TIMESTAMP('%y-%-m', x)",
"duckdb": "STRPTIME(x, '%y-%-m')",
"presto": "DATE_PARSE(x, '%y-%c')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy-M')) AS TIMESTAMP)",
"spark": "TO_TIMESTAMP(x, 'yy-M')",
},
)
self.validate_all(
"TO_TIMESTAMP(x)",
write={
"bigquery": "UNIX_TO_TIME(x)",
"duckdb": "TO_TIMESTAMP(x)",
"presto": "FROM_UNIXTIME(x)",
"hive": "FROM_UNIXTIME(x)",
},
)
self.validate_all(
"STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
write={
"bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)",
"duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
"presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)",
"spark": "TO_TIMESTAMP(x, 'M/d/yy h:mm a')",
},
)
self.validate_all(
"CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'",
read={
"snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))",
},
write={
"bigquery": "TIMESTAMP(DATETIME(CAST(start AS TIMESTAMP), 'America/New_York'))",
"duckdb": "CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'",
"snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))",
},
)
def test_sample(self):
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 5",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (5)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10%",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (10 PERCENT)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10 PERCENT (bernoulli)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE reservoir(50 ROWS) REPEATABLE (100)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10% (system, 377)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)"},
)
self.validate_all(
"SELECT * FROM tbl TABLESAMPLE RESERVOIR(20%), tbl2 WHERE tbl.i=tbl2.i",
write={
"duckdb": "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i"
},
)
self.validate_all(
"SELECT * FROM tbl, tbl2 WHERE tbl.i=tbl2.i USING SAMPLE RESERVOIR(20%)",
write={
"duckdb": "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)"
},
)
def test_duckdb(self):
self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
# https://github.com/duckdb/duckdb/releases/tag/v0.8.0
self.assertEqual(
parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b"
@ -203,6 +56,31 @@ class TestDuckDB(Validator):
self.validate_all("0x1010", write={"": "0 AS x1010"})
self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"})
self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'})
self.validate_all(
"DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
read={
"duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))",
"hive": "DATEDIFF(a, b)",
"spark": "DATEDIFF(a, b)",
"spark2": "DATEDIFF(a, b)",
},
)
self.validate_all(
"XOR(a, b)",
read={
"": "a ^ b",
"bigquery": "a ^ b",
"presto": "BITWISE_XOR(a, b)",
"postgres": "a # b",
},
write={
"": "a ^ b",
"bigquery": "a ^ b",
"duckdb": "XOR(a, b)",
"presto": "BITWISE_XOR(a, b)",
"postgres": "a # b",
},
)
self.validate_all(
"PIVOT_WIDER Cities ON Year USING SUM(Population)",
write={"duckdb": "PIVOT Cities ON Year USING SUM(Population)"},
@ -459,6 +337,155 @@ class TestDuckDB(Validator):
unsupported_level=ErrorLevel.IMMEDIATE,
)
def test_time(self):
self.validate_identity("SELECT CURRENT_DATE")
self.validate_identity("SELECT CURRENT_TIMESTAMP")
self.validate_all(
"SELECT MAKE_DATE(2016, 12, 25)", read={"bigquery": "SELECT DATE(2016, 12, 25)"}
)
self.validate_all(
"SELECT CAST(CAST('2016-12-25 23:59:59' AS DATETIME) AS DATE)",
read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"},
)
self.validate_all(
"SELECT STRPTIME(STRFTIME(CAST(CAST('2016-12-25' AS TIMESTAMPTZ) AS DATE), '%d/%m/%Y') || ' ' || 'America/Los_Angeles', '%d/%m/%Y %Z')",
read={
"bigquery": "SELECT DATE(TIMESTAMP '2016-12-25', 'America/Los_Angeles')",
},
)
self.validate_all(
"SELECT CAST(CAST(STRPTIME('05/06/2020', '%m/%d/%Y') AS DATE) AS DATE)",
read={"bigquery": "SELECT DATE(PARSE_DATE('%m/%d/%Y', '05/06/2020'))"},
)
self.validate_all(
"SELECT CAST('2020-01-01' AS DATE) + INTERVAL (-1) DAY",
read={"mysql": "SELECT DATE '2020-01-01' + INTERVAL -1 DAY"},
)
self.validate_all(
"SELECT INTERVAL '1 quarter'",
write={"duckdb": "SELECT (90 * INTERVAL '1' day)"},
)
self.validate_all(
"SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - MOD((DAYOFWEEK(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)) day) + (7 * INTERVAL (-5) day))) AS t1",
read={
"presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1",
},
)
self.validate_all(
"EPOCH(x)",
read={
"presto": "TO_UNIXTIME(x)",
},
write={
"bigquery": "TIME_TO_UNIX(x)",
"duckdb": "EPOCH(x)",
"presto": "TO_UNIXTIME(x)",
"spark": "UNIX_TIMESTAMP(x)",
},
)
self.validate_all(
"EPOCH_MS(x)",
write={
"bigquery": "UNIX_TO_TIME(x / 1000)",
"duckdb": "TO_TIMESTAMP(x / 1000)",
"presto": "FROM_UNIXTIME(x / 1000)",
"spark": "CAST(FROM_UNIXTIME(x / 1000) AS TIMESTAMP)",
},
)
self.validate_all(
"STRFTIME(x, '%y-%-m-%S')",
write={
"bigquery": "TIME_TO_STR(x, '%y-%-m-%S')",
"duckdb": "STRFTIME(x, '%y-%-m-%S')",
"postgres": "TO_CHAR(x, 'YY-FMMM-SS')",
"presto": "DATE_FORMAT(x, '%y-%c-%S')",
"spark": "DATE_FORMAT(x, 'yy-M-ss')",
},
)
self.validate_all(
"STRFTIME(x, '%Y-%m-%d %H:%M:%S')",
write={
"duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')",
"presto": "DATE_FORMAT(x, '%Y-%m-%d %T')",
"hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')",
},
)
self.validate_all(
"STRPTIME(x, '%y-%-m')",
write={
"bigquery": "PARSE_TIMESTAMP('%y-%-m', x)",
"duckdb": "STRPTIME(x, '%y-%-m')",
"presto": "DATE_PARSE(x, '%y-%c')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy-M')) AS TIMESTAMP)",
"spark": "TO_TIMESTAMP(x, 'yy-M')",
},
)
self.validate_all(
"TO_TIMESTAMP(x)",
write={
"bigquery": "UNIX_TO_TIME(x)",
"duckdb": "TO_TIMESTAMP(x)",
"presto": "FROM_UNIXTIME(x)",
"hive": "FROM_UNIXTIME(x)",
},
)
self.validate_all(
"STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
write={
"bigquery": "PARSE_TIMESTAMP('%-m/%-d/%y %-I:%M %p', x)",
"duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')",
"presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')",
"hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)",
"spark": "TO_TIMESTAMP(x, 'M/d/yy h:mm a')",
},
)
self.validate_all(
"CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'",
read={
"snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))",
},
write={
"bigquery": "TIMESTAMP(DATETIME(CAST(start AS TIMESTAMP), 'America/New_York'))",
"duckdb": "CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'",
"snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))",
},
)
def test_sample(self):
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 5",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (5)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10%",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE (10 PERCENT)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10 PERCENT (bernoulli)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE reservoir(50 ROWS) REPEATABLE (100)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)"},
)
self.validate_all(
"SELECT * FROM tbl USING SAMPLE 10% (system, 377)",
write={"duckdb": "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)"},
)
self.validate_all(
"SELECT * FROM tbl TABLESAMPLE RESERVOIR(20%), tbl2 WHERE tbl.i=tbl2.i",
write={
"duckdb": "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i"
},
)
self.validate_all(
"SELECT * FROM tbl, tbl2 WHERE tbl.i=tbl2.i USING SAMPLE RESERVOIR(20%)",
write={
"duckdb": "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)"
},
)
def test_array(self):
self.validate_identity("ARRAY(SELECT id FROM t)")
self.validate_identity("ARRAY((SELECT id FROM t))")

View file

@ -7,28 +7,57 @@ class TestHive(Validator):
def test_bits(self):
self.validate_all(
"x & 1",
write={
read={
"duckdb": "x & 1",
"presto": "BITWISE_AND(x, 1)",
"spark": "x & 1",
},
write={
"duckdb": "x & 1",
"hive": "x & 1",
"presto": "BITWISE_AND(x, 1)",
"spark": "x & 1",
},
)
self.validate_all(
"~x",
"x & 1 > 0",
read={
"duckdb": "x & 1 > 0",
"presto": "BITWISE_AND(x, 1) > 0",
"spark": "x & 1 > 0",
},
write={
"duckdb": "x & 1 > 0",
"presto": "BITWISE_AND(x, 1) > 0",
"hive": "x & 1 > 0",
"spark": "x & 1 > 0",
},
)
self.validate_all(
"~x",
read={
"duckdb": "~x",
"presto": "BITWISE_NOT(x)",
"spark": "~x",
},
write={
"duckdb": "~x",
"hive": "~x",
"presto": "BITWISE_NOT(x)",
"spark": "~x",
},
)
self.validate_all(
"x | 1",
write={
read={
"duckdb": "x | 1",
"presto": "BITWISE_OR(x, 1)",
"spark": "x | 1",
},
write={
"duckdb": "x | 1",
"hive": "x | 1",
"presto": "BITWISE_OR(x, 1)",
"spark": "x | 1",
},
)
@ -56,15 +85,6 @@ class TestHive(Validator):
"spark": "SHIFTRIGHT(x, 1)",
},
)
self.validate_all(
"x & 1 > 0",
write={
"duckdb": "x & 1 > 0",
"presto": "BITWISE_AND(x, 1) > 0",
"hive": "x & 1 > 0",
"spark": "x & 1 > 0",
},
)
def test_cast(self):
self.validate_all(

View file

@ -83,7 +83,7 @@ class TestMySQL(Validator):
self.validate_identity("CAST(x AS SET('a', 'b'))")
self.validate_identity("SELECT CURRENT_TIMESTAMP(6)")
self.validate_identity("x ->> '$.name'")
self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")
self.validate_identity("SELECT CAST(`a`.`b` AS CHAR) FROM foo")
self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ')")
self.validate_identity("SELECT TRIM(TRAILING 'bla' FROM ' XXX ')")
self.validate_identity("SELECT TRIM(BOTH 'bla' FROM ' XXX ')")
@ -436,6 +436,14 @@ class TestMySQL(Validator):
self.validate_identity("TIME_STR_TO_UNIX(x)", "UNIX_TIMESTAMP(x)")
def test_mysql(self):
self.validate_all(
"CAST(x AS TEXT)",
write={
"mysql": "CAST(x AS CHAR)",
"presto": "CAST(x AS VARCHAR)",
"starrocks": "CAST(x AS STRING)",
},
)
self.validate_all("CAST(x AS SIGNED)", write={"mysql": "CAST(x AS SIGNED)"})
self.validate_all("CAST(x AS SIGNED INTEGER)", write={"mysql": "CAST(x AS SIGNED)"})
self.validate_all("CAST(x AS UNSIGNED)", write={"mysql": "CAST(x AS UNSIGNED)"})

View file

@ -255,8 +255,8 @@ class TestPostgres(Validator):
"GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')",
write={
"postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 day')",
"presto": "SEQUENCE(TRY_CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
"trino": "SEQUENCE(TRY_CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
"presto": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
"trino": "SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' day)",
},
)
self.validate_all(
@ -470,7 +470,7 @@ class TestPostgres(Validator):
"""SELECT JSON_ARRAY_ELEMENTS((foo->'sections')::JSON) AS sections""",
write={
"postgres": """SELECT JSON_ARRAY_ELEMENTS(CAST((foo -> 'sections') AS JSON)) AS sections""",
"presto": """SELECT JSON_ARRAY_ELEMENTS(TRY_CAST((JSON_EXTRACT(foo, 'sections')) AS JSON)) AS sections""",
"presto": """SELECT JSON_ARRAY_ELEMENTS(CAST((JSON_EXTRACT(foo, 'sections')) AS JSON)) AS sections""",
},
)
self.validate_all(
@ -570,7 +570,7 @@ class TestPostgres(Validator):
},
)
self.assertIsInstance(parse_one("id::UUID", read="postgres"), exp.TryCast)
self.assertIsInstance(parse_one("id::UUID", read="postgres"), exp.Cast)
def test_bool_or(self):
self.validate_all(

View file

@ -8,6 +8,14 @@ class TestPresto(Validator):
dialect = "presto"
def test_cast(self):
self.validate_all(
"SELECT CAST('10C' AS INTEGER)",
read={
"postgres": "SELECT CAST('10C' AS INTEGER)",
"presto": "SELECT CAST('10C' AS INTEGER)",
"redshift": "SELECT CAST('10C' AS INTEGER)",
},
)
self.validate_all(
"SELECT DATE_DIFF('week', CAST(CAST('2009-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2009-12-31' AS TIMESTAMP) AS DATE))",
read={"redshift": "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')"},
@ -17,7 +25,7 @@ class TestPresto(Validator):
read={"redshift": "SELECT DATEADD(month, 18, '2008-02-28')"},
)
self.validate_all(
"SELECT TRY_CAST('1970-01-01 00:00:00' AS TIMESTAMP)",
"SELECT CAST('1970-01-01 00:00:00' AS TIMESTAMP)",
read={"postgres": "SELECT 'epoch'::TIMESTAMP"},
)
self.validate_all(

View file

@ -10,6 +10,13 @@ class TestRedshift(Validator):
self.validate_identity("foo$")
self.validate_identity("$foo")
self.validate_all(
"SELECT ADD_MONTHS('2008-03-31', 1)",
write={
"redshift": "SELECT DATEADD(month, 1, '2008-03-31')",
"trino": "SELECT DATE_ADD('month', 1, CAST(CAST('2008-03-31' AS TIMESTAMP) AS DATE))",
},
)
self.validate_all(
"SELECT STRTOL('abc', 16)",
read={
@ -104,7 +111,7 @@ class TestRedshift(Validator):
"SELECT ST_AsEWKT(ST_GeomFromEWKT('SRID=4326;POINT(10 20)')::geography)",
write={
"redshift": "SELECT ST_ASEWKT(CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))",
"bigquery": "SELECT ST_AsEWKT(SAFE_CAST(ST_GeomFromEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))",
"bigquery": "SELECT ST_AsEWKT(CAST(ST_GeomFromEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))",
},
)
self.validate_all(
@ -222,7 +229,11 @@ class TestRedshift(Validator):
def test_values(self):
self.validate_all(
"SELECT * FROM (VALUES (1, 2)) AS t",
write={"redshift": "SELECT * FROM (SELECT 1, 2) AS t"},
write={
"redshift": "SELECT * FROM (SELECT 1, 2) AS t",
"mysql": "SELECT * FROM (SELECT 1, 2) AS t",
"presto": "SELECT * FROM (VALUES (1, 2)) AS t",
},
)
self.validate_all(
"SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)",

View file

@ -386,7 +386,9 @@ class TestSnowflake(Validator):
self.validate_all(
"SELECT RLIKE(a, b)",
write={
"hive": "SELECT a RLIKE b",
"snowflake": "SELECT REGEXP_LIKE(a, b)",
"spark": "SELECT a RLIKE b",
},
)
self.validate_all(
@ -913,6 +915,58 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f, LATERA
},
)
@mock.patch("sqlglot.generator.logger")
def test_regexp_replace(self, logger):
self.validate_all(
"REGEXP_REPLACE(subject, pattern)",
write={
"bigquery": "REGEXP_REPLACE(subject, pattern, '')",
"duckdb": "REGEXP_REPLACE(subject, pattern, '')",
"hive": "REGEXP_REPLACE(subject, pattern, '')",
"snowflake": "REGEXP_REPLACE(subject, pattern, '')",
"spark": "REGEXP_REPLACE(subject, pattern, '')",
},
)
self.validate_all(
"REGEXP_REPLACE(subject, pattern, replacement)",
read={
"bigquery": "REGEXP_REPLACE(subject, pattern, replacement)",
"duckdb": "REGEXP_REPLACE(subject, pattern, replacement)",
"hive": "REGEXP_REPLACE(subject, pattern, replacement)",
"spark": "REGEXP_REPLACE(subject, pattern, replacement)",
},
write={
"bigquery": "REGEXP_REPLACE(subject, pattern, replacement)",
"duckdb": "REGEXP_REPLACE(subject, pattern, replacement)",
"hive": "REGEXP_REPLACE(subject, pattern, replacement)",
"snowflake": "REGEXP_REPLACE(subject, pattern, replacement)",
"spark": "REGEXP_REPLACE(subject, pattern, replacement)",
},
)
self.validate_all(
"REGEXP_REPLACE(subject, pattern, replacement, position)",
read={
"spark": "REGEXP_REPLACE(subject, pattern, replacement, position)",
},
write={
"bigquery": "REGEXP_REPLACE(subject, pattern, replacement)",
"duckdb": "REGEXP_REPLACE(subject, pattern, replacement)",
"hive": "REGEXP_REPLACE(subject, pattern, replacement)",
"snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position)",
"spark": "REGEXP_REPLACE(subject, pattern, replacement, position)",
},
)
self.validate_all(
"REGEXP_REPLACE(subject, pattern, replacement, position, occurrence, parameters)",
write={
"bigquery": "REGEXP_REPLACE(subject, pattern, replacement)",
"duckdb": "REGEXP_REPLACE(subject, pattern, replacement)",
"hive": "REGEXP_REPLACE(subject, pattern, replacement)",
"snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence, parameters)",
"spark": "REGEXP_REPLACE(subject, pattern, replacement, position)",
},
)
def test_match_recognize(self):
for row in (
"ONE ROW PER MATCH",

View file

@ -224,6 +224,9 @@ TBLPROPERTIES (
)
def test_spark(self):
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)")
self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)")
self.validate_identity("REFRESH table a.b.c")
self.validate_identity("INTERVAL -86 days")
self.validate_identity("SELECT UNIX_TIMESTAMP()")
self.validate_identity("TRIM(' SparkSQL ')")
@ -232,6 +235,61 @@ TBLPROPERTIES (
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("SPLIT(str, pattern, lim)")
self.validate_all(
"SELECT DATEDIFF(month, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
read={
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
},
write={
"spark": "SELECT DATEDIFF(month, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
"spark2": "SELECT MONTHS_BETWEEN(TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)), TO_DATE(CAST('1996-10-30' AS TIMESTAMP)))",
},
)
self.validate_all(
"SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')",
write={
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"hive": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')",
"spark": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')",
},
)
self.validate_all(
"SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30', FALSE)",
write={
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"hive": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')",
"spark": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30', FALSE)",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP('2016-12-31 00:12:00')",
write={
"": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)",
"duckdb": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)",
"spark": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP('2016-12-31', 'yyyy-MM-dd')",
read={
"duckdb": "SELECT STRPTIME('2016-12-31', '%Y-%m-%d')",
},
write={
"": "SELECT STR_TO_TIME('2016-12-31', '%Y-%m-%d')",
"duckdb": "SELECT STRPTIME('2016-12-31', '%Y-%m-%d')",
"spark": "SELECT TO_TIMESTAMP('2016-12-31', 'yyyy-MM-dd')",
},
)
self.validate_all(
"SELECT RLIKE('John Doe', 'John.*')",
write={
"bigquery": "SELECT REGEXP_CONTAINS('John Doe', 'John.*')",
"hive": "SELECT 'John Doe' RLIKE 'John.*'",
"postgres": "SELECT 'John Doe' ~ 'John.*'",
"snowflake": "SELECT REGEXP_LIKE('John Doe', 'John.*')",
"spark": "SELECT 'John Doe' RLIKE 'John.*'",
},
)
self.validate_all(
"UNHEX(MD5(x))",
write={
@ -446,3 +504,21 @@ TBLPROPERTIES (
"CURRENT_USER()",
write={"spark": "CURRENT_USER()"},
)
def test_transform_query(self):
self.validate_identity("SELECT TRANSFORM(x) USING 'x' AS (x INT) FROM t")
self.validate_identity(
"SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a, b, c) FROM person WHERE zip_code > 94511"
)
self.validate_identity(
"SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a STRING, b STRING, c STRING) FROM person WHERE zip_code > 94511"
)
self.validate_identity(
"SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' NULL DEFINED AS 'NULL' FROM person"
)
self.validate_identity(
"SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\t') FROM person WHERE zip_code > 94511"
)
self.validate_identity(
"SELECT TRANSFORM(zip_code, name, age) USING 'cat' FROM person WHERE zip_code > 94500"
)

View file

@ -61,6 +61,9 @@ class TestTeradata(Validator):
self.validate_identity(
"CREATE VOLATILE MULTISET TABLE a, NOT LOCAL AFTER JOURNAL, FREESPACE=1 PERCENT, DATABLOCKSIZE=10 BYTES, WITH NO CONCURRENT ISOLATED LOADING FOR ALL (a INT)"
)
self.validate_identity(
"CREATE VOLATILE SET TABLE example1 AS (SELECT col1, col2, col3 FROM table1) WITH DATA PRIMARY INDEX (col1) ON COMMIT PRESERVE ROWS"
)
self.validate_all(
"""

View file

@ -33,15 +33,15 @@ class TestTSQL(Validator):
)
self.validate_all(
"SELECT DATEPART(year, TRY_CAST('2017-01-01' AS DATE))",
"SELECT DATEPART(year, CAST('2017-01-01' AS DATE))",
read={"postgres": "SELECT DATE_PART('year', '2017-01-01'::DATE)"},
)
self.validate_all(
"SELECT DATEPART(month, TRY_CAST('2017-03-01' AS DATE))",
"SELECT DATEPART(month, CAST('2017-03-01' AS DATE))",
read={"postgres": "SELECT DATE_PART('month', '2017-03-01'::DATE)"},
)
self.validate_all(
"SELECT DATEPART(day, TRY_CAST('2017-01-02' AS DATE))",
"SELECT DATEPART(day, CAST('2017-01-02' AS DATE))",
read={"postgres": "SELECT DATE_PART('day', '2017-01-02'::DATE)"},
)
self.validate_all(
@ -54,7 +54,7 @@ class TestTSQL(Validator):
self.validate_all(
"CONVERT(INT, CONVERT(NUMERIC, '444.75'))",
write={
"mysql": "CAST(CAST('444.75' AS DECIMAL) AS INT)",
"mysql": "CAST(CAST('444.75' AS DECIMAL) AS SIGNED)",
"tsql": "CAST(CAST('444.75' AS NUMERIC) AS INTEGER)",
},
)
@ -389,7 +389,58 @@ class TestTSQL(Validator):
},
)
def test_ddl(self):
self.validate_all(
"CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIME(4), d FLOAT(24))",
write={
"tsql": "CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIMESTAMP(4), d FLOAT(24))"
},
)
def test_transaction(self):
# BEGIN { TRAN | TRANSACTION }
# [ { transaction_name | @tran_name_variable }
# [ WITH MARK [ 'description' ] ]
# ]
# [ ; ]
self.validate_identity("BEGIN TRANSACTION")
self.validate_all("BEGIN TRAN", write={"tsql": "BEGIN TRANSACTION"})
self.validate_identity("BEGIN TRANSACTION transaction_name")
self.validate_identity("BEGIN TRANSACTION @tran_name_variable")
self.validate_identity("BEGIN TRANSACTION transaction_name WITH MARK 'description'")
def test_commit(self):
# COMMIT [ { TRAN | TRANSACTION } [ transaction_name | @tran_name_variable ] ] [ WITH ( DELAYED_DURABILITY = { OFF | ON } ) ] [ ; ]
self.validate_all("COMMIT", write={"tsql": "COMMIT TRANSACTION"})
self.validate_all("COMMIT TRAN", write={"tsql": "COMMIT TRANSACTION"})
self.validate_identity("COMMIT TRANSACTION")
self.validate_identity("COMMIT TRANSACTION transaction_name")
self.validate_identity("COMMIT TRANSACTION @tran_name_variable")
self.validate_identity(
"COMMIT TRANSACTION @tran_name_variable WITH (DELAYED_DURABILITY = ON)"
)
self.validate_identity(
"COMMIT TRANSACTION transaction_name WITH (DELAYED_DURABILITY = OFF)"
)
def test_rollback(self):
# Applies to SQL Server and Azure SQL Database
# ROLLBACK { TRAN | TRANSACTION }
# [ transaction_name | @tran_name_variable
# | savepoint_name | @savepoint_variable ]
# [ ; ]
self.validate_all("ROLLBACK", write={"tsql": "ROLLBACK TRANSACTION"})
self.validate_all("ROLLBACK TRAN", write={"tsql": "ROLLBACK TRANSACTION"})
self.validate_identity("ROLLBACK TRANSACTION")
self.validate_identity("ROLLBACK TRANSACTION transaction_name")
self.validate_identity("ROLLBACK TRANSACTION @tran_name_variable")
def test_udf(self):
self.validate_identity(
"DECLARE @DWH_DateCreated DATETIME = CONVERT(DATETIME, getdate(), 104)"
)
self.validate_identity(
"CREATE PROCEDURE foo @a INTEGER, @b INTEGER AS SELECT @a = SUM(bla) FROM baz AS bar"
)
@ -446,6 +497,12 @@ WHERE
pretty=True,
)
def test_procedure_keywords(self):
self.validate_identity("BEGIN")
self.validate_identity("END")
self.validate_identity("SET XACT_ABORT ON")
def test_fullproc(self):
sql = """
CREATE procedure [TRANSF].[SP_Merge_Sales_Real]
@Loadid INTEGER
@ -696,14 +753,14 @@ WHERE
self.validate_all(
"SELECT CONVERT(VARCHAR(10), testdb.dbo.test.x, 120) y FROM testdb.dbo.test",
write={
"mysql": "SELECT CAST(DATE_FORMAT(testdb.dbo.test.x, '%Y-%m-%d %T') AS VARCHAR(10)) AS y FROM testdb.dbo.test",
"mysql": "SELECT CAST(DATE_FORMAT(testdb.dbo.test.x, '%Y-%m-%d %T') AS CHAR(10)) AS y FROM testdb.dbo.test",
"spark": "SELECT CAST(DATE_FORMAT(testdb.dbo.test.x, 'yyyy-MM-dd HH:mm:ss') AS VARCHAR(10)) AS y FROM testdb.dbo.test",
},
)
self.validate_all(
"SELECT CONVERT(VARCHAR(10), y.x) z FROM testdb.dbo.test y",
write={
"mysql": "SELECT CAST(y.x AS VARCHAR(10)) AS z FROM testdb.dbo.test AS y",
"mysql": "SELECT CAST(y.x AS CHAR(10)) AS z FROM testdb.dbo.test AS y",
"spark": "SELECT CAST(y.x AS VARCHAR(10)) AS z FROM testdb.dbo.test AS y",
},
)
@ -838,7 +895,8 @@ WHERE
write={"spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')"},
)
self.validate_all(
"SELECT FORMAT(1234567, 'f')", write={"spark": "SELECT FORMAT_NUMBER(1234567, 'f')"}
"SELECT FORMAT(1234567, 'f')",
write={"spark": "SELECT FORMAT_NUMBER(1234567, 'f')"},
)
self.validate_all(
"SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')",
@ -853,7 +911,8 @@ WHERE
write={"spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')"},
)
self.validate_all(
"SELECT FORMAT(num_col, 'c')", write={"spark": "SELECT FORMAT_NUMBER(num_col, 'c')"}
"SELECT FORMAT(num_col, 'c')",
write={"spark": "SELECT FORMAT_NUMBER(num_col, 'c')"},
)
def test_string(self):

View file

@ -76,6 +76,7 @@ map.x
SELECT update
SELECT x.update
SELECT call.x
SELECT end
a.b.INT(1.234)
INT(x / 100)
time * 100
@ -269,6 +270,8 @@ SELECT x LIKE '%x%' FROM test
SELECT * FROM test LIMIT 100
SELECT * FROM test LIMIT 1 + 1
SELECT * FROM test LIMIT 100 OFFSET 200
SELECT * FROM test LIMIT (SELECT 1)
SELECT * FROM test LIMIT (SELECT 1) OFFSET (SELECT 1)
SELECT * FROM test FETCH FIRST ROWS ONLY
SELECT * FROM test FETCH FIRST 1 ROWS ONLY
SELECT * FROM test ORDER BY id DESC FETCH FIRST 10 ROWS WITH TIES

View file

@ -93,3 +93,7 @@ WITH cte2 AS (SELECT a FROM x), cte1 AS (SELECT t.a FROM cte2 AS t) SELECT a FRO
-- Duplicate CTE nested in CTE
WITH cte1 AS (SELECT a FROM x), cte2 AS (WITH cte3 AS (SELECT a FROM x) SELECT a FROM cte3) SELECT a FROM cte2;
WITH cte1 AS (SELECT a FROM x), cte2 AS (SELECT a FROM cte1 AS cte3) SELECT a FROM cte2;
-- Wrapped subquery joined with table
SELECT * FROM ((SELECT c FROM t1) JOIN t2);
WITH cte AS (SELECT c FROM t1) SELECT * FROM (cte AS cte, t2);

View file

@ -372,3 +372,27 @@ FROM x AS x
LEFT JOIN i AS i
ON x.a = i.a;
WITH i AS (SELECT x.a AS a FROM y AS y JOIN x AS x ON y.b = x.b) SELECT x.a AS a FROM x AS x LEFT JOIN i AS i ON x.a = i.a;
# title: Outer scope selects from wrapped table with a join (unknown schema)
# execute: false
WITH _q_0 AS (SELECT t1.c AS c FROM t1 AS t1) SELECT * FROM (_q_0 AS _q_0 CROSS JOIN t2 AS t2);
WITH _q_0 AS (SELECT t1.c AS c FROM t1 AS t1) SELECT * FROM (_q_0 AS _q_0 CROSS JOIN t2 AS t2);
# title: Outer scope selects single column from wrapped table with a join
WITH _q_0 AS (
SELECT
x.a AS a
FROM x AS x
), y_2 AS (
SELECT
y.b AS b
FROM y AS y
)
SELECT
y.b AS b
FROM (
_q_0 AS _q_0
JOIN y_2 AS y
ON _q_0.a = y.b
);
SELECT y.b AS b FROM (x AS x JOIN y AS y ON x.a = y.b);

View file

@ -806,3 +806,87 @@ LEFT JOIN (
ON "y"."c" = "z"."c"
)
ON "x"."b" = "y"."b";
# title: select * from wrapped subquery
# execute: false
SELECT * FROM ((SELECT * FROM tbl));
WITH "_q_0" AS (
SELECT
*
FROM "tbl" AS "tbl"
)
SELECT
*
FROM (
"_q_0" AS "_q_0"
);
# title: select * from wrapped subquery joined to a table (known schema)
SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c);
SELECT
"x"."a" AS "a",
"x"."b" AS "b",
"y"."b" AS "b",
"y"."c" AS "c"
FROM (
"x" AS "x"
JOIN "y" AS "y"
ON "x"."a" = "y"."c"
);
# title: select * from wrapped subquery joined to a table (unknown schema)
# execute: false
SELECT * FROM ((SELECT c FROM t1) JOIN t2);
WITH "_q_0" AS (
SELECT
"t1"."c" AS "c"
FROM "t1" AS "t1"
)
SELECT
*
FROM (
"_q_0" AS "_q_0"
CROSS JOIN "t2" AS "t2"
);
# title: select specific columns from wrapped subquery joined to a table
SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b);
SELECT
"y"."b" AS "b"
FROM (
"x" AS "x"
JOIN "y" AS "y"
ON "x"."a" = "y"."b"
);
# title: select * from wrapped join of subqueries (unknown schema)
# execute: false
SELECT * FROM ((SELECT * FROM t1) JOIN (SELECT * FROM t2));
WITH "_q_0" AS (
SELECT
*
FROM "t1" AS "t1"
), "_q_1" AS (
SELECT
*
FROM "t2" AS "t2"
)
SELECT
*
FROM (
"_q_0" AS "_q_0"
CROSS JOIN "_q_1" AS "_q_1"
);
# title: select * from wrapped join of subqueries (known schema)
SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c);
SELECT
"x"."a" AS "a",
"x"."b" AS "b",
"y"."b" AS "b",
"y"."c" AS "c"
FROM (
"x" AS "x"
JOIN "y" AS "y"
ON "x"."a" = "y"."c"
);

View file

@ -16,9 +16,6 @@ SELECT x.a AS a FROM x AS x JOIN (SELECT y.b AS b FROM y AS y) AS z ON x.b = z.b
SELECT x1.a FROM (SELECT * FROM x) AS x1, (SELECT * FROM x) AS x2;
SELECT x1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS x1, (SELECT 1 AS _ FROM x AS x) AS x2;
SELECT x1.a FROM (SELECT * FROM x) AS x1, (SELECT * FROM x) AS x2;
SELECT x1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS x1, (SELECT 1 AS _ FROM x AS x) AS x2;
SELECT a FROM (SELECT DISTINCT a, b FROM x);
SELECT _q_0.a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS _q_0;
@ -77,9 +74,6 @@ SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS
SELECT a FROM (SELECT a FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc);
SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0;
SELECT a FROM (SELECT * FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc);
SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _q_0;
SELECT a FROM (SELECT * FROM aa CROSS JOIN bb);
SELECT _q_0.a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS _q_0;
@ -91,3 +85,6 @@ SELECT _q_1.a AS a FROM (SELECT _q_0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa
with cte1 as (SELECT cola, colb FROM tb UNION ALL SELECT colc, cold FROM tb2) SELECT cola FROM cte1;
WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS colc FROM tb2 AS tb2) SELECT cte1.cola AS cola FROM cte1;
SELECT * FROM ((SELECT c FROM t1) JOIN t2);
SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2);

View file

@ -307,6 +307,11 @@ SELECT x.b AS b, y.b AS b, y.c AS c FROM x AS x, y AS y;
SELECT * EXCEPT(a) FROM x;
SELECT x.b AS b FROM x AS x;
# execute: false
# note: this query would fail in the engine level because there are 0 selected columns
SELECT * EXCEPT (a, b) FROM x;
SELECT * EXCEPT (x.a, x.b) FROM x AS x;
--------------------------------------
-- Using
--------------------------------------
@ -410,8 +415,7 @@ SELECT _q_0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _q_0 AS c
--------------------------------------
-- Window functions
--------------------------------------
-- ORDER BY in window function
# title: ORDER BY in window function
SELECT a + 1 AS a, ROW_NUMBER() OVER (PARTITION BY b ORDER BY a) AS row_num FROM x;
SELECT x.a + 1 AS a, ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a) AS row_num FROM x AS x;
@ -429,7 +433,6 @@ SELECT x.a AS a, x.b AS b FROM x AS x QUALIFY COUNT(x.a) OVER (PARTITION BY x.b)
--------------------------------------
-- Expand laterals
--------------------------------------
# execute: false
select 2 AS d, d + 1 FROM x WHERE d = 2 GROUP BY d;
SELECT 2 AS d, 2 + 1 AS _col_1 FROM x AS x WHERE 2 = 2 GROUP BY 1;
@ -457,3 +460,38 @@ FROM (
FROM x
);
SELECT _q_0.i AS i, _q_0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _q_0;
--------------------------------------
-- Wrapped tables / join constructs
--------------------------------------
# execute: false
SELECT * FROM ((tbl));
SELECT * FROM ((tbl AS tbl));
SELECT a, c FROM (x LEFT JOIN y ON a = c);
SELECT x.a AS a, y.c AS c FROM (x AS x LEFT JOIN y AS y ON x.a = y.c);
# execute: false
SELECT * FROM ((a CROSS JOIN ((b CROSS JOIN c) CROSS JOIN (d CROSS JOIN e))));
SELECT * FROM ((a AS a CROSS JOIN ((b AS b CROSS JOIN c AS c) CROSS JOIN (d AS d CROSS JOIN e AS e))));
# execute: false
SELECT * FROM ((SELECT * FROM tbl));
SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _q_0);
# execute: false
SELECT * FROM ((SELECT c FROM t1) JOIN t2);
SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _q_0, t2 AS t2);
# execute: false
SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c);
SELECT y.b AS b, y.c AS c, _q_0.a AS a, _q_0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.c);
SELECT x.a, y.b, z.c FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b;
SELECT x.a AS a, y.b AS b, z.c AS c FROM x AS x LEFT JOIN (y AS y INNER JOIN z AS z ON y.c = z.c) ON x.b = y.b;
SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c);
SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _q_0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _q_1 ON _q_0.a = _q_1.c);
SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b);
SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b);

View file

@ -10,4 +10,4 @@ SELECT b FROM x AS a CROSS JOIN y AS b CROSS JOIN y AS c;
SELECT x.a FROM x JOIN y USING (a);
SELECT a, SUM(b) FROM x GROUP BY 3;
SELECT p FROM (SELECT x from xx) y CROSS JOIN yy CROSS JOIN zz
select a from (select * from x cross join y);
SELECT a FROM (SELECT * FROM x CROSS JOIN y);

View file

@ -101,3 +101,11 @@ SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = i
# title: join construct within join construct
SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1;
SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS _q_0 ON id1 = id3) AS _q_1;
# title: wrapped subquery without alias
SELECT * FROM ((SELECT * FROM t));
SELECT * FROM ((SELECT * FROM c.db.t AS t) AS _q_0);
# title: wrapped subquery without alias joined with a table
SELECT * FROM ((SELECT * FROM t1) INNER JOIN t2 ON a = b);
SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS _q_0 INNER JOIN c.db.t2 AS t2 ON a = b);

View file

@ -58,6 +58,9 @@ class TestBuild(unittest.TestCase):
(lambda: x.as_("y"), "x AS y"),
(lambda: x.isin(1, "2"), "x IN (1, '2')"),
(lambda: x.isin(query="select 1"), "x IN (SELECT 1)"),
(lambda: x.isin(unnest="x"), "x IN (SELECT UNNEST(x))"),
(lambda: x.isin(unnest="x"), "x IN UNNEST(x)", "bigquery"),
(lambda: x.isin(unnest=["x", "y"]), "x IN (SELECT UNNEST(x, y))"),
(lambda: x.between(1, 2), "x BETWEEN 1 AND 2"),
(lambda: 1 + x + 2 + 3, "1 + x + 2 + 3"),
(lambda: 1 + x * 2 + 3, "1 + (x * 2) + 3"),

View file

@ -556,6 +556,7 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("HEX(foo)"), exp.Hex)
self.assertIsInstance(parse_one("TO_HEX(foo)", read="bigquery"), exp.Hex)
self.assertIsInstance(parse_one("TO_HEX(MD5(foo))", read="bigquery"), exp.MD5)
self.assertIsInstance(parse_one("TRANSFORM(a, b)", read="spark"), exp.Transform)
def test_column(self):
column = parse_one("a.b.c.d")

View file

@ -186,3 +186,16 @@ class TestLineage(unittest.TestCase):
self.assertEqual(downstream.alias, "")
self.assertEqual(downstream.downstream, [])
def test_lineage_union(self) -> None:
node = lineage(
"x",
"SELECT ax AS x FROM a UNION SELECT bx FROM b UNION SELECT cx FROM c",
)
assert len(node.downstream) == 3
node = lineage(
"x",
"SELECT x FROM (SELECT ax AS x FROM a UNION SELECT bx FROM b UNION SELECT cx FROM c)",
)
assert len(node.downstream) == 3

View file

@ -427,6 +427,27 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
{"s.b"},
)
# Check that parentheses don't introduce a new scope unless an alias is attached
sql = "SELECT * FROM (((SELECT * FROM (t1 JOIN t2) AS t3) JOIN (SELECT * FROM t4)))"
expression = parse_one(sql)
for scopes in traverse_scope(expression), list(build_scope(expression).traverse()):
self.assertEqual(len(scopes), 4)
self.assertEqual(scopes[0].expression.sql(), "t1, t2")
self.assertEqual(set(scopes[0].sources), {"t1", "t2"})
self.assertEqual(scopes[1].expression.sql(), "SELECT * FROM (t1, t2) AS t3")
self.assertEqual(set(scopes[1].sources), {"t3"})
self.assertEqual(scopes[2].expression.sql(), "SELECT * FROM t4")
self.assertEqual(set(scopes[2].sources), {"t4"})
self.assertEqual(
scopes[3].expression.sql(),
"SELECT * FROM (((SELECT * FROM (t1, t2) AS t3), (SELECT * FROM t4)))",
)
self.assertEqual(set(scopes[3].sources), {""})
@patch("sqlglot.optimizer.scope.logger")
def test_scope_warning(self, logger):
self.assertEqual(len(traverse_scope(parse_one("WITH q AS (@y) SELECT * FROM q"))), 1)

View file

@ -1,6 +1,7 @@
import unittest
from sqlglot.dialects import BigQuery
from sqlglot.errors import TokenError
from sqlglot.tokens import Tokenizer, TokenType
@ -65,7 +66,7 @@ x"""
self.assertEqual(tokens[3].token_type, TokenType.SEMICOLON)
def test_error_msg(self):
with self.assertRaisesRegex(ValueError, "Error tokenizing 'select /'"):
with self.assertRaisesRegex(TokenError, "Error tokenizing 'select /'"):
Tokenizer().tokenize("select /*")
def test_jinja(self):

View file

@ -186,7 +186,7 @@ WHERE
*/
SELECT
tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */,
CAST(x AS INT), # comment 3
CAST(x AS CHAR), # comment 3
y -- comment 4
FROM
bar /* comment 5 */,
@ -198,7 +198,7 @@ WHERE
*/
SELECT
tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */,
CAST(x AS INT), /* comment 3 */
CAST(x AS CHAR), /* comment 3 */
y /* comment 4 */
FROM bar /* comment 5 */, tbl /* comment 6 */""",
read="mysql",
@ -211,9 +211,12 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""",
-- comment 1
AND bar
-- comment 2
AND bla;
AND bla
-- comment 3
LIMIT 10
;
""",
"SELECT a FROM b WHERE foo AND /* comment 1 */ bar AND /* comment 2 */ bla",
"SELECT a FROM b WHERE foo AND /* comment 1 */ bar AND /* comment 2 */ bla LIMIT 10 /* comment 3 */",
)
self.validate(
"""
@ -285,6 +288,47 @@ FROM v""",
"SELECT 1 /* hi this is a comment */",
read="snowflake",
)
self.validate(
"-- comment\nDROP TABLE IF EXISTS foo",
"/* comment */ DROP TABLE IF EXISTS foo",
)
self.validate(
"""
-- comment1
-- comment2
-- comment3
DROP TABLE IF EXISTS db.tba
""",
"""/* comment1 */
/* comment2 */
/* comment3 */
DROP TABLE IF EXISTS db.tba""",
pretty=True,
)
self.validate(
"""
CREATE TABLE db.tba AS
SELECT a, b, c
FROM tb_01
WHERE
-- comment5
a = 1 AND b = 2 --comment6
-- and c = 1
-- comment7
""",
"""CREATE TABLE db.tba AS
SELECT
a,
b,
c
FROM tb_01
WHERE
a /* comment5 */ = 1 AND b = 2 /* comment6 */
/* and c = 1 */
/* comment7 */""",
pretty=True,
)
def test_types(self):
self.validate("INT 1", "CAST(1 AS INT)")