1
0
Fork 0

Adding upstream version 20.3.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:17:33 +01:00
parent 5bd573dda1
commit fd9de5e4cb
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
132 changed files with 55125 additions and 51576 deletions

View file

@ -6,6 +6,8 @@ class TestClickhouse(Validator):
dialect = "clickhouse"
def test_clickhouse(self):
self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y")
string_types = [
"BLOB",
"LONGBLOB",
@ -68,6 +70,18 @@ class TestClickhouse(Validator):
self.validate_identity("CAST(x AS DATETIME)")
self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)")
self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src")
self.validate_identity(
"SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL"
)
self.validate_identity(
"SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5"
)
self.validate_identity(
"SELECT toDate((number * 10) * 86400) AS d1, toDate(number * 86400) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ORDER BY d2 WITH FILL, d1 WITH FILL STEP 5"
)
self.validate_identity(
"SELECT n, source, inter FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1)"
)
self.validate_identity(
"SELECT SUM(1) AS impressions, arrayJoin(cities) AS city, arrayJoin(browsers) AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3"
)

View file

@ -54,6 +54,13 @@ class TestDuckDB(Validator):
},
)
self.validate_all(
"SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table",
read={
"bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table",
"duckdb": "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table",
},
)
self.validate_all(
"WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER (WHERE x > 1) FROM cte",
write={
@ -109,6 +116,8 @@ class TestDuckDB(Validator):
parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
)
self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)")
self.validate_identity("MAKE_TIMESTAMP(1667810584123456)")
self.validate_identity("SELECT EPOCH_MS(10) AS t")
self.validate_identity("SELECT MAKE_TIMESTAMP(10) AS t")
self.validate_identity("SELECT TO_TIMESTAMP(10) AS t")

View file

@ -8,6 +8,11 @@ class TestPostgres(Validator):
dialect = "postgres"
def test_ddl(self):
expr = parse_one("CREATE TABLE t (x INTERVAL day)", read="postgres")
cdef = expr.find(exp.ColumnDef)
cdef.args["kind"].assert_is(exp.DataType)
self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL day)")
self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)")
self.validate_identity("CREATE TABLE test (elems JSONB[])")
self.validate_identity("CREATE TABLE public.y (x TSTZRANGE NOT NULL)")
@ -203,6 +208,8 @@ class TestPostgres(Validator):
self.validate_identity("SELECT 1 OPERATOR(pg_catalog.+) 2")
def test_postgres(self):
self.validate_identity("EXEC AS myfunc @id = 123")
expr = parse_one(
"SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)", read="postgres"
)

View file

@ -379,6 +379,16 @@ class TestPresto(Validator):
"presto": "TIMESTAMP(x, '12:00:00')",
},
)
self.validate_all(
"DATE_ADD('DAY', CAST(x AS BIGINT), y)",
write={
"presto": "DATE_ADD('DAY', CAST(x AS BIGINT), y)",
},
read={
"presto": "DATE_ADD('DAY', x, y)",
},
)
self.validate_identity("DATE_ADD('DAY', 1, y)")
def test_ddl(self):
self.validate_all(
@ -462,10 +472,10 @@ class TestPresto(Validator):
)
self.validate_all(
'CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" TEXT) WITH (PARTITIONED BY=("ds"))',
"""CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" TEXT) COMMENT 'comment' WITH (PARTITIONED BY=("ds"))""",
write={
"spark": "CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) PARTITIONED BY (`ds`)",
"presto": """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" VARCHAR) WITH (PARTITIONED_BY=ARRAY['ds'])""",
"spark": "CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) COMMENT 'comment' PARTITIONED BY (`ds`)",
"presto": """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" VARCHAR) COMMENT 'comment' WITH (PARTITIONED_BY=ARRAY['ds'])""",
},
)
@ -534,26 +544,18 @@ class TestPresto(Validator):
},
)
def test_unicode_string(self):
for prefix in ("u&", "U&"):
self.validate_identity(
f"{prefix}'Hello winter \\2603 !'",
"U&'Hello winter \\2603 !'",
)
self.validate_identity(
f"{prefix}'Hello winter #2603 !' UESCAPE '#'",
"U&'Hello winter #2603 !' UESCAPE '#'",
)
def test_presto(self):
self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
self.validate_identity(
"SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955"
)
self.validate_identity(
"SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)"
)
self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1")
self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY")
self.validate_identity("SELECT BOOL_OR(a > 10) FROM asd AS T(a)")
self.validate_identity("SELECT * FROM (VALUES (1))")
self.validate_identity("START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE")
self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ")
self.validate_identity("APPROX_PERCENTILE(a, b, c, d)")
self.validate_identity(
"SELECT SPLIT_TO_MAP('a:1;b:2;a:3', ';', ':', (k, v1, v2) -> CONCAT(v1, v2))"
)
with self.assertLogs(helper_logger) as cm:
self.validate_all(
"SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table",
@ -572,6 +574,24 @@ class TestPresto(Validator):
},
)
self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1")
self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY")
self.validate_identity("SELECT BOOL_OR(a > 10) FROM asd AS T(a)")
self.validate_identity("SELECT * FROM (VALUES (1))")
self.validate_identity("START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE")
self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ")
self.validate_identity("APPROX_PERCENTILE(a, b, c, d)")
self.validate_identity(
"SELECT SPLIT_TO_MAP('a:1;b:2;a:3', ';', ':', (k, v1, v2) -> CONCAT(v1, v2))"
)
self.validate_identity(
"SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955"
)
self.validate_identity(
"SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)"
)
self.validate_all(
"SELECT MAX_BY(a.id, a.timestamp) FROM a",
read={
@ -1044,3 +1064,61 @@ MATCH_RECOGNIZE (
)""",
pretty=True,
)
def test_to_char(self):
self.validate_all(
"TO_CHAR(ts, 'dd')",
write={
"bigquery": "FORMAT_DATE('%d', ts)",
"presto": "DATE_FORMAT(ts, '%d')",
},
)
self.validate_all(
"TO_CHAR(ts, 'hh')",
write={
"bigquery": "FORMAT_DATE('%H', ts)",
"presto": "DATE_FORMAT(ts, '%H')",
},
)
self.validate_all(
"TO_CHAR(ts, 'hh24')",
write={
"bigquery": "FORMAT_DATE('%H', ts)",
"presto": "DATE_FORMAT(ts, '%H')",
},
)
self.validate_all(
"TO_CHAR(ts, 'mi')",
write={
"bigquery": "FORMAT_DATE('%M', ts)",
"presto": "DATE_FORMAT(ts, '%i')",
},
)
self.validate_all(
"TO_CHAR(ts, 'mm')",
write={
"bigquery": "FORMAT_DATE('%m', ts)",
"presto": "DATE_FORMAT(ts, '%m')",
},
)
self.validate_all(
"TO_CHAR(ts, 'ss')",
write={
"bigquery": "FORMAT_DATE('%S', ts)",
"presto": "DATE_FORMAT(ts, '%s')",
},
)
self.validate_all(
"TO_CHAR(ts, 'yyyy')",
write={
"bigquery": "FORMAT_DATE('%Y', ts)",
"presto": "DATE_FORMAT(ts, '%Y')",
},
)
self.validate_all(
"TO_CHAR(ts, 'yy')",
write={
"bigquery": "FORMAT_DATE('%y', ts)",
"presto": "DATE_FORMAT(ts, '%y')",
},
)

View file

@ -36,6 +36,8 @@ WHERE
)""",
)
self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))")
self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))")
self.validate_identity("SELECT user_id, value FROM table_name sample ($s) SEED (0)")
self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)")
self.validate_identity("SELECT OBJECT_CONSTRUCT()")
@ -72,6 +74,18 @@ WHERE
self.validate_identity(
'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage'
)
self.validate_identity(
"SELECT * FROM foo at",
"SELECT * FROM foo AS at",
)
self.validate_identity(
"SELECT * FROM foo before",
"SELECT * FROM foo AS before",
)
self.validate_identity(
"SELECT * FROM foo at (col)",
"SELECT * FROM foo AS at(col)",
)
self.validate_identity(
"SELECT * FROM unnest(x) with ordinality",
"SELECT * FROM TABLE(FLATTEN(INPUT => x)) AS _u(seq, key, path, index, value, this)",
@ -115,11 +129,37 @@ WHERE
"SELECT TO_TIMESTAMP(x) FROM t",
"SELECT CAST(x AS TIMESTAMPNTZ) FROM t",
)
self.validate_identity(
"CAST(x AS BYTEINT)",
"CAST(x AS INT)",
)
self.validate_identity(
"CAST(x AS CHAR VARYING)",
"CAST(x AS VARCHAR)",
)
self.validate_identity(
"CAST(x AS CHARACTER VARYING)",
"CAST(x AS VARCHAR)",
)
self.validate_identity(
"CAST(x AS NCHAR VARYING)",
"CAST(x AS VARCHAR)",
)
self.validate_all("CAST(x AS BYTEINT)", write={"snowflake": "CAST(x AS INT)"})
self.validate_all("CAST(x AS CHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"})
self.validate_all("CAST(x AS CHARACTER VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"})
self.validate_all("CAST(x AS NCHAR VARYING)", write={"snowflake": "CAST(x AS VARCHAR)"})
self.validate_all(
"SELECT TO_ARRAY(['test'])",
write={
"snowflake": "SELECT TO_ARRAY(['test'])",
"spark": "SELECT ARRAY('test')",
},
)
self.validate_all(
"SELECT TO_ARRAY(['test'])",
write={
"snowflake": "SELECT TO_ARRAY(['test'])",
"spark": "SELECT ARRAY('test')",
},
)
self.validate_all(
# We need to qualify the columns in this query because "value" would be ambiguous
'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t, TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))',
@ -489,8 +529,8 @@ WHERE
self.validate_all(
"TO_ARRAY(x)",
write={
"spark": "ARRAY(x)",
"snowflake": "[x]",
"spark": "IF(x IS NULL, NULL, ARRAY(x))",
"snowflake": "TO_ARRAY(x)",
},
)
self.validate_all(
@ -626,6 +666,10 @@ WHERE
"SELECT * FROM @mystage t (c1)",
"SELECT * FROM @mystage AS t(c1)",
)
self.validate_identity(
"SELECT * FROM @foo/bar (PATTERN => 'test', FILE_FORMAT => ds_sandbox.test.my_csv_format) AS bla",
"SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla",
)
def test_sample(self):
self.validate_identity("SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)")
@ -775,6 +819,53 @@ WHERE
},
)
def test_historical_data(self):
self.validate_identity("SELECT * FROM my_table AT (STATEMENT => $query_id_var)")
self.validate_identity("SELECT * FROM my_table AT (OFFSET => -60 * 5)")
self.validate_identity("SELECT * FROM my_table BEFORE (STATEMENT => $query_id_var)")
self.validate_identity("SELECT * FROM my_table BEFORE (OFFSET => -60 * 5)")
self.validate_identity("CREATE SCHEMA restored_schema CLONE my_schema AT (OFFSET => -3600)")
self.validate_identity(
"CREATE TABLE restored_table CLONE my_table AT (TIMESTAMP => CAST('Sat, 09 May 2015 01:01:00 +0300' AS TIMESTAMPTZ))",
)
self.validate_identity(
"CREATE DATABASE restored_db CLONE my_db BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')"
)
self.validate_identity(
"SELECT * FROM my_table AT (TIMESTAMP => TO_TIMESTAMP(1432669154242, 3))"
)
self.validate_identity(
"SELECT * FROM my_table AT (OFFSET => -60 * 5) AS T WHERE T.flag = 'valid'"
)
self.validate_identity(
"SELECT * FROM my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')"
)
self.validate_identity(
"SELECT * FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')"
)
self.validate_identity(
"SELECT * FROM my_table AT (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp)",
"SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPNTZ))",
)
self.validate_identity(
"SELECT * FROM my_table AT(TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz)",
"SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))",
)
self.validate_identity(
"SELECT * FROM my_table BEFORE (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz);",
"SELECT * FROM my_table BEFORE (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))",
)
self.validate_identity(
"""
SELECT oldt.* , newt.*
FROM my_table BEFORE(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt
FULL OUTER JOIN my_table AT(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt
ON oldt.id = newt.id
WHERE oldt.id IS NULL OR newt.id IS NULL;
""",
"SELECT oldt.*, newt.* FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL",
)
def test_ddl(self):
self.validate_identity(
"""create external table et2(

View file

@ -75,7 +75,7 @@ class TestSpark(Validator):
col_a INTEGER,
date VARCHAR
)
COMMENT='Test comment: blah'
COMMENT 'Test comment: blah'
WITH (
PARTITIONED_BY=ARRAY['date'],
FORMAT='ICEBERG',

View file

@ -20,6 +20,12 @@ class TestTSQL(Validator):
self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)")
self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0")
self.validate_all(
"SELECT TOP 1 * FROM (SELECT x FROM t1 UNION ALL SELECT x FROM t2) AS _l_0",
read={
"": "SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1",
},
)
self.validate_all(
"WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp",
read={

View file

@ -865,5 +865,8 @@ KILL CONNECTION 123
KILL QUERY '123'
CHR(97)
SELECT * FROM UNNEST(x) WITH ORDINALITY UNION ALL SELECT * FROM UNNEST(y) WITH ORDINALITY
SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1
SELECT x FROM t1 UNION ALL SELECT x FROM t2 UNION ALL SELECT x FROM t3 LIMIT 1
WITH use(use) AS (SELECT 1) SELECT use FROM use
SELECT recursive FROM t
SELECT (ROW_NUMBER() OVER (PARTITION BY user ORDER BY date ASC) - ROW_NUMBER() OVER (PARTITION BY user, segment ORDER BY date ASC)) AS group_id FROM example_table

View file

@ -46,3 +46,71 @@ FROM x;
SELECT
a
FROM x;
# title: CTE reference in subquery where alias matches outer table name
WITH q AS (
SELECT
a
FROM y
)
SELECT
a
FROM x AS q
WHERE
a IN (
SELECT
a
FROM q
);
WITH q AS (
SELECT
a
FROM y
)
SELECT
a
FROM x AS q
WHERE
a IN (
SELECT
a
FROM q
);
# title: CTE reference in subquery where alias matches outer table name and outer alias is also CTE
WITH q AS (
SELECT
a
FROM y
), q2 AS (
SELECT
a
FROM y
)
SELECT
a
FROM q2 AS q
WHERE
a IN (
SELECT
a
FROM q
);
WITH q AS (
SELECT
a
FROM y
), q2 AS (
SELECT
a
FROM y
)
SELECT
a
FROM q2 AS q
WHERE
a IN (
SELECT
a
FROM q
);

View file

@ -42,17 +42,9 @@ WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z CR
WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y;
WITH y_2 AS (SELECT a FROM x), y AS (SELECT a FROM y_2 AS y) SELECT a FROM y;
-- Union
SELECT 1 AS x, 2 AS y UNION ALL SELECT 1 AS x, 2 AS y;
WITH cte AS (SELECT 1 AS x, 2 AS y) SELECT cte.x AS x, cte.y AS y FROM cte AS cte UNION ALL SELECT cte.x AS x, cte.y AS y FROM cte AS cte;
-- Union of selects with derived tables
(SELECT a FROM (SELECT b FROM x)) UNION (SELECT a FROM (SELECT b FROM y));
WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT a FROM cte AS cte), cte_3 AS (SELECT b FROM y), cte_4 AS (SELECT a FROM cte_3 AS cte_3) (SELECT cte_2.a AS a FROM cte_2 AS cte_2) UNION (SELECT cte_4.a AS a FROM cte_4 AS cte_4);
-- Three unions
SELECT a FROM x UNION ALL SELECT a FROM y UNION ALL SELECT a FROM z;
WITH cte AS (SELECT a FROM x), cte_2 AS (SELECT a FROM y), cte_3 AS (SELECT a FROM z), cte_4 AS (SELECT cte_2.a AS a FROM cte_2 AS cte_2 UNION ALL SELECT cte_3.a AS a FROM cte_3 AS cte_3) SELECT cte.a AS a FROM cte AS cte UNION ALL SELECT cte_4.a AS a FROM cte_4 AS cte_4;
WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT b FROM y) (SELECT a FROM cte AS cte) UNION (SELECT a FROM cte_2 AS cte_2);
-- Subquery
SELECT a FROM x WHERE b = (SELECT y.c FROM y);

View file

@ -411,3 +411,20 @@ FROM (
ON _q_0.a = y.b
);
SELECT y.b AS b FROM (x AS x JOIN y AS y ON x.a = y.b);
# title: merge cte into subquery with overlapping alias
WITH q AS (
SELECT
y.b AS a
FROM y AS y
)
SELECT
q.a AS a
FROM x AS q
WHERE
q.a IN (
SELECT
q.a AS a
FROM q AS q
);
SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y);

View file

@ -131,7 +131,7 @@ SELECT
SUM("y"."b") AS "sum_b"
FROM "x" AS "x"
LEFT JOIN "_u_0" AS "_u_0"
ON "x"."b" = "_u_0"."_u_1"
ON "_u_0"."_u_1" = "x"."b"
JOIN "y" AS "y"
ON "x"."b" = "y"."b"
WHERE
@ -522,7 +522,7 @@ OR (
SELECT
*,
IFF(
IFF("unioned"."uploaded_at" >= '2022-06-16', 'workday', 'bamboohr') = "unioned"."source_system",
"unioned"."source_system" = IFF("unioned"."uploaded_at" >= '2022-06-16', 'workday', 'bamboohr'),
1,
0
) AS "sort_order"
@ -950,7 +950,7 @@ SELECT
FROM "y" AS "y"
CROSS JOIN "_u_0" AS "_u_0"
JOIN "x" AS "x"
ON "y"."b" = "x"."b"
ON "x"."b" = "y"."b"
GROUP BY
"x"."a";
@ -989,7 +989,7 @@ SELECT
COALESCE("m"."a", "foo"."a") AS "a"
FROM "m"
JOIN "n" AS "foo"("a")
ON "m"."a" = "foo"."a";
ON "foo"."a" = "m"."a";
# title: reduction of string concatenation that uses CONCAT(..), || and +
# execute: false
@ -1068,7 +1068,7 @@ SELECT
COALESCE("alias3"."c_od", 0) AS "c_od"
FROM "table1" AS "table1"
LEFT JOIN "alias3"
ON "table1"."cid" = "alias3"."cid";
ON "alias3"."cid" = "table1"."cid";
# title: CTE with EXPLODE cannot be merged
# dialect: spark
@ -1115,3 +1115,32 @@ WITH `t` AS (
SELECT
`t`.`CoL` AS `CoL`
FROM `t`;
# title: top-level query is parenthesized
# execute: false
WITH x AS (
SELECT a FROM t
)
(
SELECT * FROM x
UNION ALL
SELECT * FROM x
LIMIT 10
)
LIMIT 10;
WITH "x" AS (
SELECT
"t"."a" AS "a"
FROM "t" AS "t"
)
(
SELECT
"x"."a" AS "a"
FROM "x"
UNION ALL
SELECT
"x"."a" AS "a"
FROM "x"
LIMIT 10
)
LIMIT 10;

View file

@ -26,10 +26,10 @@ SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y) AS y ON y.a = 1 AND x.a = y
SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y) AS y ON y.a = 1 WHERE x.a = 1 AND x.b = 1 AND y.a = x.a;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON TRUE AND y.a = x.a WHERE x.a = 1 AND x.b = 1 AND TRUE;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE WHERE x.a = 1 AND TRUE AND x.b = 1;
SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT * FROM y AS y) AS y WHERE x.a = 1 AND x.b = 1 AND y.a = x.a AND y.a = 1;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON TRUE AND y.a = x.a WHERE x.a = 1 AND x.b = 1 AND TRUE AND TRUE;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE WHERE x.a = 1 AND TRUE AND x.b = 1 AND TRUE;
with t1 as (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1;
WITH t1 AS (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1;

View file

@ -645,6 +645,9 @@ x < 5 AND x > 3;
x > 3 AND 5 < x AND x BETWEEN 9 AND 10;
x <= 10 AND x >= 9;
NOT x BETWEEN 0 AND 1;
x < 0 OR x > 1;
1 < x AND 3 < x;
x > 3;
@ -657,6 +660,42 @@ x <> 2018 OR x = 2018;
t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y;
t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y;
1 < x;
x > 1;
1 <= x;
x >= 1;
1 > x;
x < 1;
1 >= x;
x <= 1;
1 = x;
x = 1;
1 <> x;
x <> 1;
NOT 1 < x;
x <= 1;
NOT 1 <= x;
x < 1;
NOT 1 > x;
x >= 1;
NOT 1 >= x;
x > 1;
NOT 1 = x;
x <> 1;
NOT 1 <> x;
x = 1;
--------------------------------------
-- COALESCE
--------------------------------------
@ -667,7 +706,7 @@ COALESCE(x, 1) = 2;
NOT x IS NULL AND x = 2;
2 = COALESCE(x, 1);
2 = x AND NOT x IS NULL;
NOT x IS NULL AND x = 2;
COALESCE(x, 1, 1) = 1 + 1;
NOT x IS NULL AND x = 2;
@ -759,6 +798,20 @@ CONCAT(a, b) IN (SELECT * FROM foo WHERE cond);
--------------------------------------
-- DATE_TRUNC
--------------------------------------
DATE_TRUNC('week', CAST('2023-12-15' AS DATE));
CAST('2023-12-11' AS DATE);
DATE_TRUNC('week', CAST('2023-12-16' AS DATE));
CAST('2023-12-11' AS DATE);
# dialect: bigquery
DATE_TRUNC(CAST('2023-12-15' AS DATE), WEEK);
CAST('2023-12-10' AS DATE);
# dialect: bigquery
DATE_TRUNC(CAST('2023-12-16' AS DATE), WEEK);
CAST('2023-12-10' AS DATE);
DATE_TRUNC('year', x) = CAST('2021-01-01' AS DATE);
x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE);
@ -801,7 +854,7 @@ DATE_TRUNC('year', x) < CAST('2021-01-01' AS DATE);
x < CAST('2021-01-01' AS DATE);
DATE_TRUNC('year', x) < CAST('2021-01-02' AS DATE);
x < CAST('2021-01-01' AS DATE);
x < CAST('2022-01-01' AS DATE);
DATE_TRUNC('year', x) >= CAST('2021-01-01' AS DATE);
x >= CAST('2021-01-01' AS DATE);
@ -841,7 +894,7 @@ x < CAST('2022-01-01 00:00:00' AS DATETIME) AND x >= CAST('2021-01-01 00:00:00'
-- right side is not a date literal
DATE_TRUNC('day', x) = CAST(y AS DATE);
DATE_TRUNC('day', x) = CAST(y AS DATE);
CAST(y AS DATE) = DATE_TRUNC('day', x);
-- nested cast
DATE_TRUNC('day', x) = CAST(CAST('2021-01-01 01:02:03' AS DATETIME) AS DATE);
@ -905,10 +958,10 @@ DATE_SUB(x, 1, DAY) <> CAST('2021-01-01' AS DATE);
x <> CAST('2021-01-02' AS DATE);
DATE_ADD(DATE_ADD(DATE_TRUNC('week', DATE_SUB(x, 1, DAY)), 1, DAY), 1, YEAR) < CAST('2021-01-08' AS DATE);
x < CAST('2020-01-07' AS DATE);
x < CAST('2020-01-14' AS DATE);
x - INTERVAL '1' day = CAST(y AS DATE);
x - INTERVAL '1' day = CAST(y AS DATE);
CAST(y AS DATE) = x - INTERVAL '1' day;
--------------------------------------
-- Constant Propagation
@ -917,16 +970,16 @@ x = 5 AND y = x;
x = 5 AND y = 5;
5 = x AND y = x;
5 = x AND y = 5;
x = 5 AND y = 5;
x = 5 OR y = x;
x = 5 OR y = x;
x = 5 OR x = y;
(x = 5 AND y = x) OR y = 1;
(x = 5 AND y = 5) OR y = 1;
t.x = 5 AND y = x;
t.x = 5 AND y = x;
t.x = 5 AND x = y;
t.x = 'a' AND y = CONCAT_WS('-', t.x, 'b');
t.x = 'a' AND y = 'a-b';
@ -938,7 +991,7 @@ x = 5 AND x = 6;
FALSE;
x = 5 AND (y = x OR z = 1);
x = 5 AND (y = x OR z = 1);
x = 5 AND (x = y OR z = 1);
x = 5 AND x + 3 = 8;
x = 5;
@ -950,7 +1003,7 @@ x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1);
(SELECT z = 5 FROM t WHERE y = 1) AND x = 1 AND y > 0;
x = 1 AND x = y AND (SELECT z FROM t WHERE a AND (b OR c));
(SELECT z FROM t WHERE a AND (b OR c)) AND 1 = y AND x = 1;
(SELECT z FROM t WHERE a AND (b OR c)) AND x = 1 AND y = 1;
t1.a = 39 AND t2.b = t1.a AND t3.c = t2.b;
t1.a = 39 AND t2.b = 39 AND t3.c = 39;
@ -968,7 +1021,7 @@ x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END;
CASE WHEN x = 5 THEN FALSE ELSE TRUE END AND x = y;
x = 1 AND CASE WHEN y = 5 THEN x = z END;
CASE WHEN y = 5 THEN 1 = z END AND x = 1;
CASE WHEN y = 5 THEN z = 1 END AND x = 1;
--------------------------------------
-- Simplify Conditionals
@ -1028,4 +1081,4 @@ CASE x WHEN y THEN z END;
CASE WHEN x = y THEN z END;
CASE x1 + x2 WHEN x3 THEN x4 WHEN x5 + x6 THEN x7 ELSE x8 END;
CASE WHEN (x1 + x2) = x3 THEN x4 WHEN (x1 + x2) = (x5 + x6) THEN x7 ELSE x8 END;
CASE WHEN x3 = (x1 + x2) THEN x4 WHEN (x1 + x2) = (x5 + x6) THEN x7 ELSE x8 END;

File diff suppressed because it is too large Load diff

View file

@ -118,9 +118,9 @@ WITH "region_2" AS (
"partsupp"."ps_partkey" AS "_u_1"
FROM "partsupp_2" AS "partsupp"
JOIN "supplier" AS "supplier"
ON "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
JOIN "region_2" AS "region"
ON "nation"."n_regionkey" = "region"."r_regionkey"
GROUP BY
@ -138,18 +138,18 @@ SELECT
FROM "part" AS "part"
CROSS JOIN "region_2" AS "region"
LEFT JOIN "_u_0" AS "_u_0"
ON "part"."p_partkey" = "_u_0"."_u_1"
ON "_u_0"."_u_1" = "part"."p_partkey"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "partsupp_2" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
AND "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
WHERE
"part"."p_size" = 15
"_u_0"."_col_0" = "partsupp"."ps_supplycost"
AND "part"."p_size" = 15
AND "part"."p_type" LIKE '%BRASS'
AND "partsupp"."ps_supplycost" = "_u_0"."_col_0"
ORDER BY
"s_acctbal" DESC,
"n_name",
@ -300,7 +300,7 @@ JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
JOIN "region" AS "region"
ON "nation"."n_regionkey" = "region"."r_regionkey" AND "region"."r_name" = 'ASIA'
GROUP BY
@ -381,14 +381,14 @@ SELECT
)) AS "revenue"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
ON "lineitem"."l_suppkey" = "supplier"."s_suppkey"
AND CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1996-12-31' AS DATE)
AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1995-01-01' AS DATE)
JOIN "nation" AS "n1"
ON (
"n1"."n_name" = 'FRANCE' OR "n1"."n_name" = 'GERMANY'
)
AND "supplier"."s_nationkey" = "n1"."n_nationkey"
AND "n1"."n_nationkey" = "supplier"."s_nationkey"
JOIN "nation" AS "n2"
ON (
"n1"."n_name" = 'FRANCE' OR "n2"."n_name" = 'FRANCE'
@ -403,7 +403,7 @@ JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "n2"."n_nationkey"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND "orders"."o_orderkey" = "lineitem"."l_orderkey"
AND "lineitem"."l_orderkey" = "orders"."o_orderkey"
GROUP BY
"n1"."n_name",
"n2"."n_name",
@ -470,18 +470,18 @@ FROM "part" AS "part"
JOIN "region" AS "region"
ON "region"."r_name" = 'AMERICA'
JOIN "lineitem" AS "lineitem"
ON "part"."p_partkey" = "lineitem"."l_partkey"
ON "lineitem"."l_partkey" = "part"."p_partkey"
JOIN "nation" AS "n1"
ON "n1"."n_regionkey" = "region"."r_regionkey"
JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "n1"."n_nationkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
ON "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "n2"
ON "supplier"."s_nationkey" = "n2"."n_nationkey"
ON "n2"."n_nationkey" = "supplier"."s_nationkey"
JOIN "orders" AS "orders"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "orders"."o_custkey" = "customer"."c_custkey"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND CAST("orders"."o_orderdate" AS DATE) <= CAST('1996-12-31' AS DATE)
AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1995-01-01' AS DATE)
WHERE
@ -536,16 +536,16 @@ SELECT
) AS "sum_profit"
FROM "part" AS "part"
JOIN "lineitem" AS "lineitem"
ON "part"."p_partkey" = "lineitem"."l_partkey"
ON "lineitem"."l_partkey" = "part"."p_partkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
JOIN "partsupp" AS "partsupp"
ON "partsupp"."ps_partkey" = "lineitem"."l_partkey"
AND "partsupp"."ps_suppkey" = "lineitem"."l_suppkey"
ON "lineitem"."l_partkey" = "partsupp"."ps_partkey"
AND "lineitem"."l_suppkey" = "partsupp"."ps_suppkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
ON "lineitem"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
WHERE
"part"."p_name" LIKE '%green%'
GROUP BY
@ -672,7 +672,7 @@ WITH "supplier_2" AS (
JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
)
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
@ -682,11 +682,11 @@ CROSS JOIN "_u_0" AS "_u_0"
JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_nationkey" = "supplier"."s_nationkey"
GROUP BY
"partsupp"."ps_partkey"
HAVING
SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") > MAX("_u_0"."_col_0")
MAX("_u_0"."_col_0") < SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty")
ORDER BY
"value" DESC;
@ -740,9 +740,9 @@ SELECT
FROM "orders" AS "orders"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate"
AND "lineitem"."l_commitdate" > "lineitem"."l_shipdate"
AND "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP')
AND "orders"."o_orderkey" = "lineitem"."l_orderkey"
AND CAST("lineitem"."l_receiptdate" AS DATE) < CAST('1995-01-01' AS DATE)
AND CAST("lineitem"."l_receiptdate" AS DATE) >= CAST('1994-01-01' AS DATE)
GROUP BY
@ -893,9 +893,9 @@ SELECT
"revenue"."total_revenue" AS "total_revenue"
FROM "supplier" AS "supplier"
JOIN "revenue"
ON "supplier"."s_suppkey" = "revenue"."supplier_no"
ON "revenue"."supplier_no" = "supplier"."s_suppkey"
JOIN "_u_0" AS "_u_0"
ON "revenue"."total_revenue" = "_u_0"."_col_0"
ON "_u_0"."_col_0" = "revenue"."total_revenue"
ORDER BY
"s_suppkey";
@ -948,7 +948,7 @@ SELECT
COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt"
FROM "partsupp" AS "partsupp"
LEFT JOIN "_u_0" AS "_u_0"
ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey"
ON "_u_0"."s_suppkey" = "partsupp"."ps_suppkey"
JOIN "part" AS "part"
ON "part"."p_brand" <> 'Brand#45'
AND "part"."p_partkey" = "partsupp"."ps_partkey"
@ -998,13 +998,13 @@ SELECT
SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly"
FROM "lineitem" AS "lineitem"
JOIN "part" AS "part"
ON "part"."p_brand" = 'Brand#23'
ON "lineitem"."l_partkey" = "part"."p_partkey"
AND "part"."p_brand" = 'Brand#23'
AND "part"."p_container" = 'MED BOX'
AND "part"."p_partkey" = "lineitem"."l_partkey"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "part"."p_partkey"
WHERE
"lineitem"."l_quantity" < "_u_0"."_col_0";
"_u_0"."_col_0" > "lineitem"."l_quantity";
--------------------------------------
-- TPC-H 18
@ -1064,9 +1064,9 @@ FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
LEFT JOIN "_u_0" AS "_u_0"
ON "orders"."o_orderkey" = "_u_0"."l_orderkey"
ON "_u_0"."l_orderkey" = "orders"."o_orderkey"
JOIN "lineitem" AS "lineitem"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
WHERE
NOT "_u_0"."l_orderkey" IS NULL
GROUP BY
@ -1125,57 +1125,57 @@ SELECT
FROM "lineitem" AS "lineitem"
JOIN "part" AS "part"
ON (
"part"."p_brand" = 'Brand#12'
"lineitem"."l_partkey" = "part"."p_partkey"
AND "part"."p_brand" = 'Brand#12'
AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 5
AND "part"."p_size" >= 1
)
OR (
"part"."p_brand" = 'Brand#23'
"lineitem"."l_partkey" = "part"."p_partkey"
AND "part"."p_brand" = 'Brand#23'
AND "part"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 10
AND "part"."p_size" >= 1
)
OR (
"part"."p_brand" = 'Brand#34'
"lineitem"."l_partkey" = "part"."p_partkey"
AND "part"."p_brand" = 'Brand#34'
AND "part"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 15
AND "part"."p_size" >= 1
)
WHERE
(
"lineitem"."l_quantity" <= 11
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 11
AND "lineitem"."l_quantity" >= 1
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#12'
AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 5
AND "part"."p_size" >= 1
)
OR (
"lineitem"."l_quantity" <= 20
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 20
AND "lineitem"."l_quantity" >= 10
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#23'
AND "part"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 10
AND "part"."p_size" >= 1
)
OR (
"lineitem"."l_quantity" <= 30
"lineitem"."l_partkey" = "part"."p_partkey"
AND "lineitem"."l_quantity" <= 30
AND "lineitem"."l_quantity" >= 20
AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON'
AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG')
AND "part"."p_brand" = 'Brand#34'
AND "part"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND "part"."p_partkey" = "lineitem"."l_partkey"
AND "part"."p_size" <= 15
AND "part"."p_size" >= 1
);
@ -1245,11 +1245,11 @@ WITH "_u_0" AS (
"partsupp"."ps_suppkey" AS "ps_suppkey"
FROM "partsupp" AS "partsupp"
LEFT JOIN "_u_0" AS "_u_0"
ON "partsupp"."ps_partkey" = "_u_0"."p_partkey"
ON "_u_0"."p_partkey" = "partsupp"."ps_partkey"
LEFT JOIN "_u_1" AS "_u_1"
ON "_u_1"."_u_2" = "partsupp"."ps_partkey" AND "_u_1"."_u_3" = "partsupp"."ps_suppkey"
WHERE
"partsupp"."ps_availqty" > "_u_1"."_col_0" AND NOT "_u_0"."p_partkey" IS NULL
"_u_1"."_col_0" < "partsupp"."ps_availqty" AND NOT "_u_0"."p_partkey" IS NULL
GROUP BY
"partsupp"."ps_suppkey"
)
@ -1258,9 +1258,9 @@ SELECT
"supplier"."s_address" AS "s_address"
FROM "supplier" AS "supplier"
LEFT JOIN "_u_4" AS "_u_4"
ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey"
ON "_u_4"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "nation"."n_name" = 'CANADA' AND "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_name" = 'CANADA' AND "nation"."n_nationkey" = "supplier"."s_nationkey"
WHERE
NOT "_u_4"."ps_suppkey" IS NULL
ORDER BY
@ -1323,7 +1323,7 @@ WITH "_u_0" AS (
ARRAY_AGG("l3"."l_suppkey") AS "_u_3"
FROM "lineitem" AS "l3"
WHERE
"l3"."l_receiptdate" > "l3"."l_commitdate"
"l3"."l_commitdate" < "l3"."l_receiptdate"
GROUP BY
"l3"."l_orderkey"
)
@ -1332,24 +1332,24 @@ SELECT
COUNT(*) AS "numwait"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "l1"
ON "l1"."l_receiptdate" > "l1"."l_commitdate"
AND "supplier"."s_suppkey" = "l1"."l_suppkey"
ON "l1"."l_commitdate" < "l1"."l_receiptdate"
AND "l1"."l_suppkey" = "supplier"."s_suppkey"
JOIN "nation" AS "nation"
ON "nation"."n_name" = 'SAUDI ARABIA'
AND "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "nation"."n_nationkey" = "supplier"."s_nationkey"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."l_orderkey" = "l1"."l_orderkey"
LEFT JOIN "_u_2" AS "_u_2"
ON "_u_2"."l_orderkey" = "l1"."l_orderkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "l1"."l_orderkey" AND "orders"."o_orderstatus" = 'F'
ON "l1"."l_orderkey" = "orders"."o_orderkey" AND "orders"."o_orderstatus" = 'F'
WHERE
(
"_u_2"."l_orderkey" IS NULL
OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "l1"."l_suppkey")
OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "l1"."l_suppkey" <> "_x")
)
AND NOT "_u_0"."l_orderkey" IS NULL
AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "l1"."l_suppkey")
AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "l1"."l_suppkey" <> "_x")
GROUP BY
"supplier"."s_name"
ORDER BY
@ -1417,7 +1417,7 @@ SELECT
SUM("customer"."c_acctbal") AS "totacctbal"
FROM "customer" AS "customer"
JOIN "_u_0" AS "_u_0"
ON "customer"."c_acctbal" > "_u_0"."_col_0"
ON "_u_0"."_col_0" < "customer"."c_acctbal"
LEFT JOIN "_u_1" AS "_u_1"
ON "_u_1"."_u_2" = "customer"."c_custkey"
WHERE

View file

@ -24,6 +24,7 @@ WHERE
AND x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a OFFSET 10)
AND x.a > ALL (SELECT y.c FROM y WHERE y.a = x.a)
AND x.a > (SELECT COUNT(*) as d FROM y WHERE y.a = x.a)
AND x.a = SUM(SELECT 1) -- invalid statement left alone
;
SELECT
*
@ -208,7 +209,10 @@ WHERE
OFFSET 10
)
AND ARRAY_ALL(_u_19."", _x -> _x = x.a)
AND x.a > COALESCE(_u_21.d, 0);
AND x.a > COALESCE(_u_21.d, 0)
AND x.a = SUM(SELECT
1) /* invalid statement left alone */
;
SELECT
CAST((
SELECT

602
tests/gen_fixtures.py Normal file
View file

@ -0,0 +1,602 @@
import time
from sqlglot.optimizer import optimize
TPCH_SCHEMA = {
"lineitem": {
"l_orderkey": "bigint",
"l_partkey": "bigint",
"l_suppkey": "bigint",
"l_linenumber": "bigint",
"l_quantity": "double",
"l_extendedprice": "double",
"l_discount": "double",
"l_tax": "double",
"l_returnflag": "string",
"l_linestatus": "string",
"l_shipdate": "string",
"l_commitdate": "string",
"l_receiptdate": "string",
"l_shipinstruct": "string",
"l_shipmode": "string",
"l_comment": "string",
},
"orders": {
"o_orderkey": "bigint",
"o_custkey": "bigint",
"o_orderstatus": "string",
"o_totalprice": "double",
"o_orderdate": "string",
"o_orderpriority": "string",
"o_clerk": "string",
"o_shippriority": "int",
"o_comment": "string",
},
"customer": {
"c_custkey": "bigint",
"c_name": "string",
"c_address": "string",
"c_nationkey": "bigint",
"c_phone": "string",
"c_acctbal": "double",
"c_mktsegment": "string",
"c_comment": "string",
},
"part": {
"p_partkey": "bigint",
"p_name": "string",
"p_mfgr": "string",
"p_brand": "string",
"p_type": "string",
"p_size": "int",
"p_container": "string",
"p_retailprice": "double",
"p_comment": "string",
},
"supplier": {
"s_suppkey": "bigint",
"s_name": "string",
"s_address": "string",
"s_nationkey": "bigint",
"s_phone": "string",
"s_acctbal": "double",
"s_comment": "string",
},
"partsupp": {
"ps_partkey": "bigint",
"ps_suppkey": "bigint",
"ps_availqty": "int",
"ps_supplycost": "double",
"ps_comment": "string",
},
"nation": {
"n_nationkey": "bigint",
"n_name": "string",
"n_regionkey": "bigint",
"n_comment": "string",
},
"region": {
"r_regionkey": "bigint",
"r_name": "string",
"r_comment": "string",
},
}
TPCDS_SCHEMA = {
"catalog_sales": {
"cs_sold_date_sk": "bigint",
"cs_sold_time_sk": "bigint",
"cs_ship_date_sk": "bigint",
"cs_bill_customer_sk": "bigint",
"cs_bill_cdemo_sk": "bigint",
"cs_bill_hdemo_sk": "bigint",
"cs_bill_addr_sk": "bigint",
"cs_ship_customer_sk": "bigint",
"cs_ship_cdemo_sk": "bigint",
"cs_ship_hdemo_sk": "bigint",
"cs_ship_addr_sk": "bigint",
"cs_call_center_sk": "bigint",
"cs_catalog_page_sk": "bigint",
"cs_ship_mode_sk": "bigint",
"cs_warehouse_sk": "bigint",
"cs_item_sk": "bigint",
"cs_promo_sk": "bigint",
"cs_order_number": "bigint",
"cs_quantity": "bigint",
"cs_wholesale_cost": "double",
"cs_list_price": "double",
"cs_sales_price": "double",
"cs_ext_discount_amt": "double",
"cs_ext_sales_price": "double",
"cs_ext_wholesale_cost": "double",
"cs_ext_list_price": "double",
"cs_ext_tax": "double",
"cs_coupon_amt": "double",
"cs_ext_ship_cost": "double",
"cs_net_paid": "double",
"cs_net_paid_inc_tax": "double",
"cs_net_paid_inc_ship": "double",
"cs_net_paid_inc_ship_tax": "double",
"cs_net_profit": "double",
},
"catalog_returns": {
"cr_returned_date_sk": "bigint",
"cr_returned_time_sk": "bigint",
"cr_item_sk": "bigint",
"cr_refunded_customer_sk": "bigint",
"cr_refunded_cdemo_sk": "bigint",
"cr_refunded_hdemo_sk": "bigint",
"cr_refunded_addr_sk": "bigint",
"cr_returning_customer_sk": "bigint",
"cr_returning_cdemo_sk": "bigint",
"cr_returning_hdemo_sk": "bigint",
"cr_returning_addr_sk": "bigint",
"cr_call_center_sk": "bigint",
"cr_catalog_page_sk": "bigint",
"cr_ship_mode_sk": "bigint",
"cr_warehouse_sk": "bigint",
"cr_reason_sk": "bigint",
"cr_order_number": "bigint",
"cr_return_quantity": "bigint",
"cr_return_amount": "double",
"cr_return_tax": "double",
"cr_return_amt_inc_tax": "double",
"cr_fee": "double",
"cr_return_ship_cost": "double",
"cr_refunded_cash": "double",
"cr_reversed_charge": "double",
"cr_store_credit": "double",
"cr_net_loss": "double",
},
"inventory": {
"inv_date_sk": "bigint",
"inv_item_sk": "bigint",
"inv_warehouse_sk": "bigint",
"inv_quantity_on_hand": "bigint",
},
"store_sales": {
"ss_sold_date_sk": "bigint",
"ss_sold_time_sk": "bigint",
"ss_item_sk": "bigint",
"ss_customer_sk": "bigint",
"ss_cdemo_sk": "bigint",
"ss_hdemo_sk": "bigint",
"ss_addr_sk": "bigint",
"ss_store_sk": "bigint",
"ss_promo_sk": "bigint",
"ss_ticket_number": "bigint",
"ss_quantity": "bigint",
"ss_wholesale_cost": "double",
"ss_list_price": "double",
"ss_sales_price": "double",
"ss_ext_discount_amt": "double",
"ss_ext_sales_price": "double",
"ss_ext_wholesale_cost": "double",
"ss_ext_list_price": "double",
"ss_ext_tax": "double",
"ss_coupon_amt": "double",
"ss_net_paid": "double",
"ss_net_paid_inc_tax": "double",
"ss_net_profit": "double",
},
"store_returns": {
"sr_returned_date_sk": "bigint",
"sr_return_time_sk": "bigint",
"sr_item_sk": "bigint",
"sr_customer_sk": "bigint",
"sr_cdemo_sk": "bigint",
"sr_hdemo_sk": "bigint",
"sr_addr_sk": "bigint",
"sr_store_sk": "bigint",
"sr_reason_sk": "bigint",
"sr_ticket_number": "bigint",
"sr_return_quantity": "bigint",
"sr_return_amt": "double",
"sr_return_tax": "double",
"sr_return_amt_inc_tax": "double",
"sr_fee": "double",
"sr_return_ship_cost": "double",
"sr_refunded_cash": "double",
"sr_reversed_charge": "double",
"sr_store_credit": "double",
"sr_net_loss": "double",
},
"web_sales": {
"ws_sold_date_sk": "bigint",
"ws_sold_time_sk": "bigint",
"ws_ship_date_sk": "bigint",
"ws_item_sk": "bigint",
"ws_bill_customer_sk": "bigint",
"ws_bill_cdemo_sk": "bigint",
"ws_bill_hdemo_sk": "bigint",
"ws_bill_addr_sk": "bigint",
"ws_ship_customer_sk": "bigint",
"ws_ship_cdemo_sk": "bigint",
"ws_ship_hdemo_sk": "bigint",
"ws_ship_addr_sk": "bigint",
"ws_web_page_sk": "bigint",
"ws_web_site_sk": "bigint",
"ws_ship_mode_sk": "bigint",
"ws_warehouse_sk": "bigint",
"ws_promo_sk": "bigint",
"ws_order_number": "bigint",
"ws_quantity": "bigint",
"ws_wholesale_cost": "double",
"ws_list_price": "double",
"ws_sales_price": "double",
"ws_ext_discount_amt": "double",
"ws_ext_sales_price": "double",
"ws_ext_wholesale_cost": "double",
"ws_ext_list_price": "double",
"ws_ext_tax": "double",
"ws_coupon_amt": "double",
"ws_ext_ship_cost": "double",
"ws_net_paid": "double",
"ws_net_paid_inc_tax": "double",
"ws_net_paid_inc_ship": "double",
"ws_net_paid_inc_ship_tax": "double",
"ws_net_profit": "double",
},
"web_returns": {
"wr_returned_date_sk": "bigint",
"wr_returned_time_sk": "bigint",
"wr_item_sk": "bigint",
"wr_refunded_customer_sk": "bigint",
"wr_refunded_cdemo_sk": "bigint",
"wr_refunded_hdemo_sk": "bigint",
"wr_refunded_addr_sk": "bigint",
"wr_returning_customer_sk": "bigint",
"wr_returning_cdemo_sk": "bigint",
"wr_returning_hdemo_sk": "bigint",
"wr_returning_addr_sk": "bigint",
"wr_web_page_sk": "bigint",
"wr_reason_sk": "bigint",
"wr_order_number": "bigint",
"wr_return_quantity": "bigint",
"wr_return_amt": "double",
"wr_return_tax": "double",
"wr_return_amt_inc_tax": "double",
"wr_fee": "double",
"wr_return_ship_cost": "double",
"wr_refunded_cash": "double",
"wr_reversed_charge": "double",
"wr_account_credit": "double",
"wr_net_loss": "double",
},
"call_center": {
"cc_call_center_sk": "bigint",
"cc_call_center_id": "string",
"cc_rec_start_date": "string",
"cc_rec_end_date": "string",
"cc_closed_date_sk": "bigint",
"cc_open_date_sk": "bigint",
"cc_name": "string",
"cc_class": "string",
"cc_employees": "bigint",
"cc_sq_ft": "bigint",
"cc_hours": "string",
"cc_manager": "string",
"cc_mkt_id": "bigint",
"cc_mkt_class": "string",
"cc_mkt_desc": "string",
"cc_market_manager": "string",
"cc_division": "bigint",
"cc_division_name": "string",
"cc_company": "bigint",
"cc_company_name": "string",
"cc_street_number": "string",
"cc_street_name": "string",
"cc_street_type": "string",
"cc_suite_number": "string",
"cc_city": "string",
"cc_county": "string",
"cc_state": "string",
"cc_zip": "string",
"cc_country": "string",
"cc_gmt_offset": "double",
"cc_tax_percentage": "double",
},
"catalog_page": {
"cp_catalog_page_sk": "bigint",
"cp_catalog_page_id": "string",
"cp_start_date_sk": "bigint",
"cp_end_date_sk": "bigint",
"cp_department": "string",
"cp_catalog_number": "bigint",
"cp_catalog_page_number": "bigint",
"cp_description": "string",
"cp_type": "string",
},
"customer": {
"c_customer_sk": "bigint",
"c_customer_id": "string",
"c_current_cdemo_sk": "bigint",
"c_current_hdemo_sk": "bigint",
"c_current_addr_sk": "bigint",
"c_first_shipto_date_sk": "bigint",
"c_first_sales_date_sk": "bigint",
"c_salutation": "string",
"c_first_name": "string",
"c_last_name": "string",
"c_preferred_cust_flag": "string",
"c_birth_day": "bigint",
"c_birth_month": "bigint",
"c_birth_year": "bigint",
"c_birth_country": "string",
"c_login": "string",
"c_email_address": "string",
"c_last_review_date": "string",
},
"customer_address": {
"ca_address_sk": "bigint",
"ca_address_id": "string",
"ca_street_number": "string",
"ca_street_name": "string",
"ca_street_type": "string",
"ca_suite_number": "string",
"ca_city": "string",
"ca_county": "string",
"ca_state": "string",
"ca_zip": "string",
"ca_country": "string",
"ca_gmt_offset": "double",
"ca_location_type": "string",
},
"customer_demographics": {
"cd_demo_sk": "bigint",
"cd_gender": "string",
"cd_marital_status": "string",
"cd_education_status": "string",
"cd_purchase_estimate": "bigint",
"cd_credit_rating": "string",
"cd_dep_count": "bigint",
"cd_dep_employed_count": "bigint",
"cd_dep_college_count": "bigint",
},
"date_dim": {
"d_date_sk": "bigint",
"d_date_id": "string",
"d_date": "string",
"d_month_seq": "bigint",
"d_week_seq": "bigint",
"d_quarter_seq": "bigint",
"d_year": "bigint",
"d_dow": "bigint",
"d_moy": "bigint",
"d_dom": "bigint",
"d_qoy": "bigint",
"d_fy_year": "bigint",
"d_fy_quarter_seq": "bigint",
"d_fy_week_seq": "bigint",
"d_day_name": "string",
"d_quarter_name": "string",
"d_holiday": "string",
"d_weekend": "string",
"d_following_holiday": "string",
"d_first_dom": "bigint",
"d_last_dom": "bigint",
"d_same_day_ly": "bigint",
"d_same_day_lq": "bigint",
"d_current_day": "string",
"d_current_week": "string",
"d_current_month": "string",
"d_current_quarter": "string",
"d_current_year": "string",
},
"household_demographics": {
"hd_demo_sk": "bigint",
"hd_income_band_sk": "bigint",
"hd_buy_potential": "string",
"hd_dep_count": "bigint",
"hd_vehicle_count": "bigint",
},
"income_band": {
"ib_income_band_sk": "bigint",
"ib_lower_bound": "bigint",
"ib_upper_bound": "bigint",
},
"item": {
"i_item_sk": "bigint",
"i_item_id": "string",
"i_rec_start_date": "string",
"i_rec_end_date": "string",
"i_item_desc": "string",
"i_current_price": "double",
"i_wholesale_cost": "double",
"i_brand_id": "bigint",
"i_brand": "string",
"i_class_id": "bigint",
"i_class": "string",
"i_category_id": "bigint",
"i_category": "string",
"i_manufact_id": "bigint",
"i_manufact": "string",
"i_size": "string",
"i_formulation": "string",
"i_color": "string",
"i_units": "string",
"i_container": "string",
"i_manager_id": "bigint",
"i_product_name": "string",
},
"promotion": {
"p_promo_sk": "bigint",
"p_promo_id": "string",
"p_start_date_sk": "bigint",
"p_end_date_sk": "bigint",
"p_item_sk": "bigint",
"p_cost": "double",
"p_response_target": "bigint",
"p_promo_name": "string",
"p_channel_dmail": "string",
"p_channel_email": "string",
"p_channel_catalog": "string",
"p_channel_tv": "string",
"p_channel_radio": "string",
"p_channel_press": "string",
"p_channel_event": "string",
"p_channel_demo": "string",
"p_channel_details": "string",
"p_purpose": "string",
"p_discount_active": "string",
},
"reason": {"r_reason_sk": "bigint", "r_reason_id": "string", "r_reason_desc": "string"},
"ship_mode": {
"sm_ship_mode_sk": "bigint",
"sm_ship_mode_id": "string",
"sm_type": "string",
"sm_code": "string",
"sm_carrier": "string",
"sm_contract": "string",
},
"store": {
"s_store_sk": "bigint",
"s_store_id": "string",
"s_rec_start_date": "string",
"s_rec_end_date": "string",
"s_closed_date_sk": "bigint",
"s_store_name": "string",
"s_number_employees": "bigint",
"s_floor_space": "bigint",
"s_hours": "string",
"s_manager": "string",
"s_market_id": "bigint",
"s_geography_class": "string",
"s_market_desc": "string",
"s_market_manager": "string",
"s_division_id": "bigint",
"s_division_name": "string",
"s_company_id": "bigint",
"s_company_name": "string",
"s_street_number": "string",
"s_street_name": "string",
"s_street_type": "string",
"s_suite_number": "string",
"s_city": "string",
"s_county": "string",
"s_state": "string",
"s_zip": "string",
"s_country": "string",
"s_gmt_offset": "double",
"s_tax_precentage": "double",
},
"time_dim": {
"t_time_sk": "bigint",
"t_time_id": "string",
"t_time": "bigint",
"t_hour": "bigint",
"t_minute": "bigint",
"t_second": "bigint",
"t_am_pm": "string",
"t_shift": "string",
"t_sub_shift": "string",
"t_meal_time": "string",
},
"warehouse": {
"w_warehouse_sk": "bigint",
"w_warehouse_id": "string",
"w_warehouse_name": "string",
"w_warehouse_sq_ft": "bigint",
"w_street_number": "string",
"w_street_name": "string",
"w_street_type": "string",
"w_suite_number": "string",
"w_city": "string",
"w_county": "string",
"w_state": "string",
"w_zip": "string",
"w_country": "string",
"w_gmt_offset": "double",
},
"web_page": {
"wp_web_page_sk": "bigint",
"wp_web_page_id": "string",
"wp_rec_start_date": "string",
"wp_rec_end_date": "string",
"wp_creation_date_sk": "bigint",
"wp_access_date_sk": "bigint",
"wp_autogen_flag": "string",
"wp_customer_sk": "bigint",
"wp_url": "string",
"wp_type": "string",
"wp_char_count": "bigint",
"wp_link_count": "bigint",
"wp_image_count": "bigint",
"wp_max_ad_count": "bigint",
},
"web_site": {
"web_site_sk": "bigint",
"web_site_id": "string",
"web_rec_start_date": "string",
"web_rec_end_date": "string",
"web_name": "string",
"web_open_date_sk": "bigint",
"web_close_date_sk": "bigint",
"web_class": "string",
"web_manager": "string",
"web_mkt_id": "bigint",
"web_mkt_class": "string",
"web_mkt_desc": "string",
"web_market_manager": "string",
"web_company_id": "bigint",
"web_company_name": "string",
"web_street_number": "string",
"web_street_name": "string",
"web_street_type": "string",
"web_suite_number": "string",
"web_city": "string",
"web_county": "string",
"web_state": "string",
"web_zip": "string",
"web_country": "string",
"web_gmt_offset": "string",
"web_tax_percentage": "double",
},
}
def rewrite_fixtures(in_path, out_path, schema, num, kind):
with open(out_path, "w", encoding="UTF-8") as fixture:
for i in range(num):
i = i + 1
with open(in_path.format(i=i), encoding="UTF-8") as file:
original = "\n".join(
line.rstrip()
for line in file.read().split(";")[0].split("\n")
if not line.startswith("--")
)
original = original.replace("`", '"').strip()
now = time.time()
try:
optimized = optimize(original, schema=schema)
except Exception as e:
print("****", i, e, "****")
continue
fixture.write(
f"""--------------------------------------
-- TPC-{kind} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};
"""
)
print(i, time.time() - now)
rewrite_fixtures(
"/home/toby/dev/tpch/{i}.sql",
"/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql",
TPCH_SCHEMA,
22,
"H",
)
rewrite_fixtures(
"/home/toby/dev/tpcds/query{i}.sql",
"/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql",
TPCDS_SCHEMA,
99,
"DS",
)

View file

@ -289,11 +289,6 @@ class TestExecutor(unittest.TestCase):
["a"],
[(1,), (2,), (3,)],
),
(
"SELECT 1 AS a UNION SELECT 2 AS a UNION SELECT 3 AS a",
["a"],
[(1,), (2,), (3,)],
),
(
"SELECT 1 / 2 AS a",
["a"],
@ -320,6 +315,11 @@ class TestExecutor(unittest.TestCase):
(None,),
],
),
(
"SELECT a FROM x UNION ALL SELECT a FROM x LIMIT 1",
["a"],
[("a",)],
),
]:
with self.subTest(sql):
if isinstance(rows, list):

View file

@ -109,7 +109,9 @@ class TestOptimizer(unittest.TestCase):
},
}
def check_file(self, file, func, pretty=False, execute=False, set_dialect=False, **kwargs):
def check_file(
self, file, func, pretty=False, execute=False, set_dialect=False, only=None, **kwargs
):
with ProcessPoolExecutor() as pool:
results = {}
@ -117,6 +119,8 @@ class TestOptimizer(unittest.TestCase):
load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1
):
title = meta.get("title") or f"{i}, {sql}"
if only and title != only:
continue
dialect = meta.get("dialect")
leave_tables_isolated = meta.get("leave_tables_isolated")
@ -137,13 +141,14 @@ class TestOptimizer(unittest.TestCase):
)
for future in as_completed(results):
optimized = future.result()
sql, title, expected, dialect, execute = results[future]
with self.subTest(title):
optimized = future.result()
actual = optimized.sql(pretty=pretty, dialect=dialect)
self.assertEqual(
expected,
optimized.sql(pretty=pretty, dialect=dialect),
actual,
)
if string_to_bool(execute):
@ -309,7 +314,7 @@ class TestOptimizer(unittest.TestCase):
self.check_file("pushdown_projections", pushdown_projections, schema=self.schema)
def test_simplify(self):
self.check_file("simplify", simplify)
self.check_file("simplify", simplify, set_dialect=True)
expression = parse_one("TRUE AND TRUE AND TRUE")
self.assertEqual(exp.true(), optimizer.simplify.simplify(expression))

View file

@ -17,6 +17,13 @@ class TestParser(unittest.TestCase):
self.assertIsInstance(parse_one("int", into=exp.DataType), exp.DataType)
self.assertIsInstance(parse_one("array<int>", into=exp.DataType), exp.DataType)
self.assertIsInstance(parse_one("foo", into=exp.Table), exp.Table)
self.assertIsInstance(
parse_one(
"WHEN MATCHED THEN UPDATE SET target.salary = COALESCE(source.salary, target.salary)",
into=exp.When,
),
exp.When,
)
with self.assertRaises(ParseError) as ctx:
parse_one("SELECT * FROM tbl", into=exp.Table)
@ -94,12 +101,31 @@ class TestParser(unittest.TestCase):
tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)]
self.assertEqual(set(tables), {"a", "b.c", "d"})
def test_union_order(self):
def test_union(self):
self.assertIsInstance(parse_one("SELECT * FROM (SELECT 1) UNION SELECT 2"), exp.Union)
self.assertIsInstance(
parse_one("SELECT x FROM y HAVING x > (SELECT 1) UNION SELECT 2"), exp.Union
)
# Check that modifiers are attached to the topmost union node and not the rightmost query
single_union = "SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1"
expr = parse_one(single_union)
limit = expr.assert_is(exp.Union).args.get("limit")
self.assertIsInstance(limit, exp.Limit)
self.assertEqual(expr.sql(), single_union)
two_unions = (
"SELECT x FROM t1 UNION ALL SELECT x FROM t2 UNION ALL SELECT x FROM t3 LIMIT 1"
)
expr = parse_one(two_unions)
limit = expr.assert_is(exp.Union).args.get("limit")
self.assertIsInstance(limit, exp.Limit)
self.assertEqual(expr.sql(), two_unions)
expr = parse_one(single_union, read="clickhouse")
self.assertIsNone(expr.args.get("limit"))
self.assertEqual(expr.sql(dialect="clickhouse"), single_union)
def test_select(self):
self.assertIsNotNone(parse_one("select 1 natural"))
self.assertIsNotNone(parse_one("select * from (select 1) x order by x.y").args["order"])

View file

@ -71,6 +71,20 @@ x"""
self.assertEqual(tokens[2].line, 2)
self.assertEqual(tokens[3].line, 3)
def test_crlf(self):
tokens = Tokenizer().tokenize("SELECT a\r\nFROM b")
tokens = [(token.token_type, token.text) for token in tokens]
self.assertEqual(
tokens,
[
(TokenType.SELECT, "SELECT"),
(TokenType.VAR, "a"),
(TokenType.FROM, "FROM"),
(TokenType.VAR, "b"),
],
)
def test_command(self):
tokens = Tokenizer().tokenize("SHOW;")
self.assertEqual(tokens[0].token_type, TokenType.SHOW)

View file

@ -89,6 +89,7 @@ class TestTranspile(unittest.TestCase):
self.validate("SELECT MIN(3)>=MIN(2)", "SELECT MIN(3) >= MIN(2)")
self.validate("SELECT 1>0", "SELECT 1 > 0")
self.validate("SELECT 3>=3", "SELECT 3 >= 3")
self.validate("SELECT a\r\nFROM b", "SELECT a FROM b")
def test_comments(self):
self.validate(

View file

@ -1,115 +0,0 @@
import time
from sqlglot.optimizer import optimize
INPUT = "/home/toby/dev/tpch/{i}.sql"
OUTPUT = "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql"
NUM = 22
SCHEMA = {
"lineitem": {
"l_orderkey": "bigint",
"l_partkey": "bigint",
"l_suppkey": "bigint",
"l_linenumber": "bigint",
"l_quantity": "double",
"l_extendedprice": "double",
"l_discount": "double",
"l_tax": "double",
"l_returnflag": "string",
"l_linestatus": "string",
"l_shipdate": "string",
"l_commitdate": "string",
"l_receiptdate": "string",
"l_shipinstruct": "string",
"l_shipmode": "string",
"l_comment": "string",
},
"orders": {
"o_orderkey": "bigint",
"o_custkey": "bigint",
"o_orderstatus": "string",
"o_totalprice": "double",
"o_orderdate": "string",
"o_orderpriority": "string",
"o_clerk": "string",
"o_shippriority": "int",
"o_comment": "string",
},
"customer": {
"c_custkey": "bigint",
"c_name": "string",
"c_address": "string",
"c_nationkey": "bigint",
"c_phone": "string",
"c_acctbal": "double",
"c_mktsegment": "string",
"c_comment": "string",
},
"part": {
"p_partkey": "bigint",
"p_name": "string",
"p_mfgr": "string",
"p_brand": "string",
"p_type": "string",
"p_size": "int",
"p_container": "string",
"p_retailprice": "double",
"p_comment": "string",
},
"supplier": {
"s_suppkey": "bigint",
"s_name": "string",
"s_address": "string",
"s_nationkey": "bigint",
"s_phone": "string",
"s_acctbal": "double",
"s_comment": "string",
},
"partsupp": {
"ps_partkey": "bigint",
"ps_suppkey": "bigint",
"ps_availqty": "int",
"ps_supplycost": "double",
"ps_comment": "string",
},
"nation": {
"n_nationkey": "bigint",
"n_name": "string",
"n_regionkey": "bigint",
"n_comment": "string",
},
"region": {
"r_regionkey": "bigint",
"r_name": "string",
"r_comment": "string",
},
}
KIND = "H"
with open(OUTPUT, "w", encoding="UTF-8") as fixture:
for i in range(NUM):
i = i + 1
with open(INPUT.format(i=i), encoding="UTF-8") as file:
original = "\n".join(
line.rstrip()
for line in file.read().split(";")[0].split("\n")
if not line.startswith("--")
)
original = original.replace("`", '"').strip()
now = time.time()
try:
optimized = optimize(original, schema=SCHEMA)
except Exception as e:
print("****", i, e, "****")
continue
fixture.write(
f"""--------------------------------------
-- TPC-{KIND} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};
"""
)
print(i, time.time() - now)