1
0
Fork 0

Merging upstream version 11.4.5.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 15:48:10 +01:00
parent 0a06643852
commit 88f99e1c27
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
131 changed files with 53004 additions and 37079 deletions

View file

@ -6,6 +6,8 @@ class TestBigQuery(Validator):
dialect = "bigquery"
def test_bigquery(self):
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)")
self.validate_identity("SELECT STRUCT<ARRAY<STRING>>(['2023-01-17'])")
self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))")
self.validate_identity(
@ -13,6 +15,15 @@ class TestBigQuery(Validator):
)
self.validate_all("LEAST(x, y)", read={"sqlite": "MIN(x, y)"})
self.validate_all("CAST(x AS CHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all("CAST(x AS NCHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all("CAST(x AS NVARCHAR)", write={"bigquery": "CAST(x AS STRING)"})
self.validate_all(
"SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)",
write={
"bigquery": "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)",
},
)
self.validate_all(
"REGEXP_CONTAINS('foo', '.*')",
read={"bigquery": "REGEXP_CONTAINS('foo', '.*')"},

View file

@ -812,11 +812,13 @@ class TestDialect(Validator):
self.validate_all(
"JSON_EXTRACT(x, 'y')",
read={
"mysql": "JSON_EXTRACT(x, 'y')",
"postgres": "x->'y'",
"presto": "JSON_EXTRACT(x, 'y')",
"starrocks": "x -> 'y'",
},
write={
"mysql": "JSON_EXTRACT(x, 'y')",
"oracle": "JSON_EXTRACT(x, 'y')",
"postgres": "x -> 'y'",
"presto": "JSON_EXTRACT(x, 'y')",
@ -834,6 +836,17 @@ class TestDialect(Validator):
"presto": "JSON_EXTRACT_SCALAR(x, 'y')",
},
)
self.validate_all(
"JSON_EXTRACT_SCALAR(stream_data, '$.data.results')",
read={
"hive": "GET_JSON_OBJECT(stream_data, '$.data.results')",
"mysql": "stream_data ->> '$.data.results'",
},
write={
"hive": "GET_JSON_OBJECT(stream_data, '$.data.results')",
"mysql": "stream_data ->> '$.data.results'",
},
)
self.validate_all(
"JSONB_EXTRACT(x, 'y')",
read={
@ -1000,6 +1013,7 @@ class TestDialect(Validator):
self.validate_identity("some.column LIKE 'foo' || another.column || 'bar' || LOWER(x)")
self.validate_identity("some.column LIKE 'foo' + another.column + 'bar'")
self.validate_all("LIKE(x, 'z')", write={"": "'z' LIKE x"})
self.validate_all(
"x ILIKE '%y'",
read={
@ -1196,9 +1210,13 @@ class TestDialect(Validator):
)
self.validate_all(
"SELECT x FROM y LIMIT 10",
read={
"tsql": "SELECT TOP 10 x FROM y",
},
write={
"sqlite": "SELECT x FROM y LIMIT 10",
"oracle": "SELECT x FROM y FETCH FIRST 10 ROWS ONLY",
"tsql": "SELECT x FROM y FETCH FIRST 10 ROWS ONLY",
},
)
self.validate_all(
@ -1493,6 +1511,46 @@ SELECT
},
)
def test_logarithm(self):
self.validate_all(
"LOG(x)",
read={
"duckdb": "LOG(x)",
"postgres": "LOG(x)",
"redshift": "LOG(x)",
"sqlite": "LOG(x)",
"teradata": "LOG(x)",
},
)
self.validate_all(
"LN(x)",
read={
"bigquery": "LOG(x)",
"clickhouse": "LOG(x)",
"databricks": "LOG(x)",
"drill": "LOG(x)",
"hive": "LOG(x)",
"mysql": "LOG(x)",
"tsql": "LOG(x)",
},
)
self.validate_all(
"LOG(b, n)",
read={
"bigquery": "LOG(n, b)",
"databricks": "LOG(b, n)",
"drill": "LOG(b, n)",
"hive": "LOG(b, n)",
"mysql": "LOG(b, n)",
"oracle": "LOG(b, n)",
"postgres": "LOG(b, n)",
"snowflake": "LOG(b, n)",
"spark": "LOG(b, n)",
"sqlite": "LOG(b, n)",
"tsql": "LOG(n, b)",
},
)
def test_count_if(self):
self.validate_identity("COUNT_IF(DISTINCT cond)")

View file

@ -125,6 +125,7 @@ class TestDuckDB(Validator):
"SELECT a['x space'] FROM (SELECT {'x space': 1, 'y': 2, 'z': 3} AS a)"
)
self.validate_all("x ~ y", write={"duckdb": "REGEXP_MATCHES(x, y)"})
self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'})
self.validate_all(
"WITH 'x' AS (SELECT 1) SELECT * FROM x",

View file

@ -246,6 +246,30 @@ class TestHive(Validator):
)
def test_time(self):
self.validate_all(
"(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) * 1000",
read={
"presto": "DATE_DIFF('millisecond', x, y)",
},
)
self.validate_all(
"UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)",
read={
"presto": "DATE_DIFF('second', x, y)",
},
)
self.validate_all(
"(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 60",
read={
"presto": "DATE_DIFF('minute', x, y)",
},
)
self.validate_all(
"(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 3600",
read={
"presto": "DATE_DIFF('hour', x, y)",
},
)
self.validate_all(
"DATEDIFF(a, b)",
write={

View file

@ -16,6 +16,7 @@ class TestMySQL(Validator):
)
def test_identity(self):
self.validate_identity("x ->> '$.name'")
self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")
self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ')")
self.validate_identity("SELECT TRIM(TRAILING 'bla' FROM ' XXX ')")
@ -424,6 +425,10 @@ COMMENT='客户账户表'"""
show = self.validate_identity("SHOW INDEX FROM foo FROM bar")
self.assertEqual(show.text("db"), "bar")
self.validate_all(
"SHOW INDEX FROM bar.foo", write={"mysql": "SHOW INDEX FROM foo FROM bar"}
)
def test_show_db_like_or_where_sql(self):
for key in [
"OPEN TABLES",

View file

@ -12,6 +12,24 @@ class TestOracle(Validator):
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)")
self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y (+)")
def test_hints(self):
self.validate_identity("SELECT /*+ USE_NL(A B) */ A.COL_TEST FROM TABLE_A A, TABLE_B B")
self.validate_identity(
"SELECT /*+ INDEX(v.j jhist_employee_ix (employee_id start_date)) */ * FROM v"
)
self.validate_identity(
"SELECT /*+ USE_NL(A B C) */ A.COL_TEST FROM TABLE_A A, TABLE_B B, TABLE_C C"
)
self.validate_identity(
"SELECT /*+ NO_INDEX(employees emp_empid) */ employee_id FROM employees WHERE employee_id > 200"
)
self.validate_identity(
"SELECT /*+ NO_INDEX_FFS(items item_order_ix) */ order_id FROM order_items items"
)
self.validate_identity(
"SELECT /*+ LEADING(e j) */ * FROM employees e, departments d, job_history j WHERE e.department_id = d.department_id AND e.hire_date = j.start_date"
)
def test_xml_table(self):
self.validate_identity("XMLTABLE('x')")
self.validate_identity("XMLTABLE('x' RETURNING SEQUENCE BY REF)")

View file

@ -194,8 +194,9 @@ class TestPostgres(Validator):
write={
"postgres": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY",
"presto": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY",
"hive": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY",
"spark": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY",
"hive": "SELECT * FROM x LIMIT 1",
"spark": "SELECT * FROM x LIMIT 1",
"sqlite": "SELECT * FROM x LIMIT 1",
},
)
self.validate_all(

View file

@ -369,6 +369,12 @@ class TestPresto(Validator):
self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ")
self.validate_identity("APPROX_PERCENTILE(a, b, c, d)")
self.validate_all(
"SELECT JSON_OBJECT(KEY 'key1' VALUE 1, KEY 'key2' VALUE TRUE)",
write={
"presto": "SELECT JSON_OBJECT('key1': 1, 'key2': TRUE)",
},
)
self.validate_all(
"ARRAY_AGG(x ORDER BY y DESC)",
write={

View file

@ -530,6 +530,7 @@ class TestSnowflake(Validator):
"snowflake": "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))",
},
)
self.validate_identity("DATEDIFF(DAY, 5, CAST('2008-12-25' AS DATE))")
def test_semi_structured_types(self):
self.validate_identity("SELECT CAST(a AS VARIANT)")
@ -814,6 +815,7 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f, LATERA
self.assertIsInstance(like, exp.LikeAny)
self.assertIsInstance(ilike, exp.ILikeAny)
like.sql() # check that this doesn't raise
def test_match_recognize(self):
for row in (

View file

@ -212,6 +212,7 @@ TBLPROPERTIES (
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("SPLIT(str, pattern, lim)")
self.validate_all(
"CAST(x AS TIMESTAMP)", read={"trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)"}

View file

@ -56,6 +56,11 @@ class TestSQLite(Validator):
)
def test_sqlite(self):
self.validate_all("SELECT LIKE(y, x)", write={"sqlite": "SELECT x LIKE y"})
self.validate_all("SELECT GLOB('*y*', 'xyz')", write={"sqlite": "SELECT 'xyz' GLOB '*y*'"})
self.validate_all(
"SELECT LIKE('%y%', 'xyz', '')", write={"sqlite": "SELECT 'xyz' LIKE '%y%' ESCAPE ''"}
)
self.validate_all(
"CURRENT_DATE",
read={

View file

@ -7,6 +7,7 @@ class TestTSQL(Validator):
def test_tsql(self):
self.validate_identity("SELECT CASE WHEN a > 1 THEN b END")
self.validate_identity("SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY")
self.validate_identity("END")
self.validate_identity("@x")
self.validate_identity("#x")
@ -567,15 +568,21 @@ WHERE
write={"spark": "LAST_DAY(ADD_MONTHS(CURRENT_TIMESTAMP(), -1))"},
)
def test_variables(self):
# In TSQL @, # can be used as a prefix for variables/identifiers
expr = parse_one("@x", read="tsql")
self.assertIsInstance(expr, exp.Column)
self.assertIsInstance(expr.this, exp.Identifier)
def test_identifier_prefixes(self):
expr = parse_one("#x", read="tsql")
self.assertIsInstance(expr, exp.Column)
self.assertIsInstance(expr.this, exp.Identifier)
self.assertEqual(expr.sql("tsql"), "#x")
expr = parse_one("@x", read="tsql")
self.assertIsInstance(expr, exp.Parameter)
self.assertIsInstance(expr.this, exp.Var)
self.assertEqual(expr.sql("tsql"), "@x")
table = parse_one("select * from @x", read="tsql").args["from"].expressions[0]
self.assertIsInstance(table, exp.Table)
self.assertIsInstance(table.this, exp.Parameter)
self.assertIsInstance(table.this.this, exp.Var)
def test_system_time(self):
self.validate_all(

View file

@ -74,6 +74,7 @@ a.b.INT(1.234)
INT(x / 100)
time * 100
int * 100
dec + 1
x IN (-1, 1)
x IN ('a', 'a''a')
x IN ((1))
@ -114,7 +115,6 @@ SPLIT(SPLIT(referrer, 'utm_source=')[OFFSET(1)], "&")[OFFSET(0)]
x[ORDINAL(1)][SAFE_OFFSET(2)]
x GLOB '??-*'
x GLOB y
LIKE(x, 'z')
ILIKE(x, 'z')
x LIKE SUBSTR('abc', 1, 1)
x LIKE y
@ -272,6 +272,7 @@ SELECT a FROM test WHERE TRUE OR NOT EXISTS(SELECT * FROM x)
SELECT a AS any, b AS some, c AS all, d AS exists FROM test WHERE a = ANY (SELECT 1)
SELECT a FROM test WHERE a > ALL (SELECT 1)
SELECT a FROM test WHERE (a, b) IN (SELECT 1, 2)
SELECT X((SELECT 1) UNION (SELECT 2))
SELECT a FROM test ORDER BY a
SELECT a FROM test ORDER BY a, b
SELECT x FROM tests ORDER BY a DESC, b DESC, c
@ -646,6 +647,7 @@ DROP TABLE a.b
DROP TABLE IF EXISTS a
DROP TABLE IF EXISTS a.b
DROP TABLE a CASCADE
DROP TABLE s_hajo CASCADE CONSTRAINTS
DROP VIEW a
DROP VIEW a.b
DROP VIEW IF EXISTS a
@ -697,7 +699,7 @@ WITH a AS (SELECT * FROM b) DELETE FROM a
WITH a AS (SELECT * FROM b) CACHE TABLE a
SELECT ? AS ? FROM x WHERE b BETWEEN ? AND ? GROUP BY ?, 1 LIMIT ?
SELECT :hello, ? FROM x LIMIT :my_limit
SELECT * FROM x FETCH NEXT @take ROWS ONLY OFFSET @skip
SELECT * FROM x OFFSET @skip FETCH NEXT @take ROWS ONLY
WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS foo, c.bar AS bar FROM c)) SELECT * FROM a
WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a
SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z
@ -762,11 +764,13 @@ ALTER TABLE pets ADD CONSTRAINT pets_persons_fk FOREIGN KEY (owner_first_name, o
ALTER TABLE pets ADD CONSTRAINT pets_name_not_cute_chk CHECK (LENGTH(name) < 20)
ALTER TABLE people10m ADD CONSTRAINT dateWithinRange CHECK (birthDate > '1900-01-01')
ALTER TABLE people10m ADD CONSTRAINT validIds CHECK (id > 1 AND id < 99999999) ENFORCED
ALTER TABLE s_ut ADD CONSTRAINT s_ut_uq UNIQUE hajo
ALTER TABLE baa ADD CONSTRAINT boo PRIMARY KEY (x, y) NOT ENFORCED DEFERRABLE INITIALLY DEFERRED NORELY
ALTER TABLE baa ADD CONSTRAINT boo PRIMARY KEY (x, y) NOT ENFORCED DEFERRABLE INITIALLY DEFERRED NORELY
ALTER TABLE baa ADD CONSTRAINT boo FOREIGN KEY (x, y) REFERENCES persons ON UPDATE NO ACTION ON DELETE NO ACTION MATCH FULL
ALTER TABLE a ADD PRIMARY KEY (x, y) NOT ENFORCED
ALTER TABLE a ADD FOREIGN KEY (x, y) REFERENCES bla
SELECT partition FROM a
SELECT end FROM a
SELECT id FROM b.a AS a QUALIFY ROW_NUMBER() OVER (PARTITION BY br ORDER BY sadf DESC) = 1
SELECT LEFT.FOO FROM BLA AS LEFT
@ -776,3 +780,23 @@ SELECT * FROM x WHERE name ILIKE ANY XXX('a', 'b')
SELECT * FROM x WHERE name LIKE ANY XXX('a', 'b')
a OVERLAPS b
GRANT INSERT ON foo TO bla
PRAGMA quick_check
PRAGMA QUICK_CHECK(0)
PRAGMA QUICK_CHECK('sqlite_master')
PRAGMA schema.quick_check
PRAGMA schema.QUICK_CHECK(0)
PRAGMA schema.QUICK_CHECK('sqlite_master')
PRAGMA synchronous = 2
PRAGMA synchronous = FULL
PRAGMA memory_limit = '1GB'
PRAGMA schema.synchronous = 2
PRAGMA schema.synchronous = FULL
PRAGMA schema.memory_limit = '1GB'
JSON_OBJECT()
JSON_OBJECT('key1': 1, 'key2': TRUE)
JSON_OBJECT('id': '5', 'fld1': 'bla', 'fld2': 'bar')
JSON_OBJECT('x': NULL, 'y': 1 NULL ON NULL)
JSON_OBJECT('x': NULL, 'y': 1 WITH UNIQUE KEYS)
JSON_OBJECT('x': NULL, 'y': 1 ABSENT ON NULL WITH UNIQUE KEYS)
JSON_OBJECT('x': 1 RETURNING VARCHAR(100))
JSON_OBJECT('x': 1 RETURNING VARBINARY FORMAT JSON ENCODING UTF8)

View file

@ -10,6 +10,9 @@ SELECT CAST(1 AS VARCHAR) AS "a" FROM "w" AS "w";
SELECT CAST(1 + 3.2 AS DOUBLE) AS a FROM w AS w;
SELECT 1 + 3.2 AS "a" FROM "w" AS "w";
SELECT CAST("2022-01-01" AS DATE) + INTERVAL '1' day;
SELECT CAST("2022-01-01" AS DATE) + INTERVAL '1' "day" AS "_col_0";
--------------------------------------
-- Ensure boolean predicates
--------------------------------------

View file

@ -461,3 +461,43 @@ SELECT
*
FROM "db1"."tbl" AS "tbl"
CROSS JOIN "db2"."tbl" AS "tbl_2";
SELECT
*,
IFF(
IFF(
uploaded_at >= '2022-06-16',
'workday',
'bamboohr'
) = source_system,
1,
0
) AS sort_order
FROM
unioned
WHERE
(
source_system = 'workday'
AND '9999-01-01' >= '2022-06-16'
)
OR (
source_system = 'bamboohr'
AND '0001-01-01' < '2022-06-16'
) QUALIFY ROW_NUMBER() OVER (
PARTITION BY unique_filter_key
ORDER BY
sort_order DESC,
1
) = 1;
SELECT
*,
IFF(
IFF("unioned"."uploaded_at" >= '2022-06-16', 'workday', 'bamboohr') = "unioned"."source_system",
1,
0
) AS "sort_order"
FROM "unioned" AS "unioned"
WHERE
"unioned"."source_system" = 'bamboohr' OR "unioned"."source_system" = 'workday'
QUALIFY
ROW_NUMBER() OVER (PARTITION BY "unioned"."unique_filter_key" ORDER BY "unioned"."sort_order" DESC, 1) = 1;

View file

@ -92,6 +92,12 @@ SELECT SUM(x.a) AS c FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY y.c;
SELECT COALESCE(x.a) AS d FROM x JOIN y ON x.b = y.b GROUP BY d;
SELECT COALESCE(x.a) AS d FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY COALESCE(x.a);
SELECT a + 1 AS d FROM x WHERE d > 1;
SELECT x.a + 1 AS d FROM x AS x WHERE x.a + 1 > 1;
SELECT a + 1 AS d, d + 2 FROM x;
SELECT x.a + 1 AS d, x.a + 1 + 2 AS _col_1 FROM x AS x;
SELECT a AS a, b FROM x ORDER BY a;
SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY a;
@ -282,6 +288,30 @@ SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b WHERE COALES
SELECT b FROM x JOIN y USING (b) JOIN z USING (b);
SELECT COALESCE(x.b, y.b, z.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b;
SELECT * FROM x JOIN y USING(b);
SELECT x.a AS a, COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b;
SELECT x.* FROM x JOIN y USING(b);
SELECT x.a AS a, COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b;
SELECT * FROM x LEFT JOIN y USING(b);
SELECT x.a AS a, COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x LEFT JOIN y AS y ON x.b = y.b;
SELECT b FROM x JOIN y USING(b);
SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b;
SELECT b, c FROM x JOIN y USING(b);
SELECT COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b;
SELECT b, c FROM y JOIN z USING(b, c);
SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c;
SELECT * FROM y JOIN z USING(b, c);
SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c;
SELECT * FROM y JOIN z USING(b, c) WHERE b = 2 AND c = 3;
SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c WHERE COALESCE(y.b, z.b) = 2 AND COALESCE(y.c, z.c) = 3;
--------------------------------------
-- Hint with table reference
--------------------------------------

View file

@ -141,6 +141,9 @@ A OR C;
(A OR C) AND (A OR B OR C);
A OR C;
A AND (B AND C) AND (D AND E);
A AND B AND C AND D AND E;
--------------------------------------
-- Elimination
--------------------------------------
@ -249,6 +252,12 @@ x = x;
(('a' = 'a') AND TRUE and NOT FALSE);
TRUE;
(x = y) and z;
x = y AND z;
x * (1 - y);
x * (1 - y);
--------------------------------------
-- Literals
--------------------------------------
@ -310,7 +319,7 @@ TRUE;
8.0;
6 - 2 + 4 * 2 + a;
12 + a;
a + 12;
a + 1 + 1 + 2;
a + 4;
@ -367,7 +376,7 @@ interval '1' year + date '1998-01-01';
CAST('1999-01-01' AS DATE);
interval '1' year + date '1998-01-01' + 3 * 7 * 4;
CAST('1999-01-01' AS DATE) + 84;
84 + CAST('1999-01-01' AS DATE);
date '1998-12-01' - interval '90' foo;
CAST('1998-12-01' AS DATE) - INTERVAL '90' foo;
@ -554,3 +563,6 @@ x <= 10 AND x >= 9;
1 < x AND 3 < x;
x > 3;
'a' < 'b';
TRUE;

12613
tests/fixtures/optimizer/tpc-ds/tpc-ds.sql vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -390,9 +390,9 @@ SELECT
)) AS "revenue"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1996-12-31' AS DATE)
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
AND CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1996-12-31' AS DATE)
AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1995-01-01' AS DATE)
AND "supplier"."s_suppkey" = "lineitem"."l_suppkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
JOIN "customer" AS "customer"
@ -743,11 +743,11 @@ SELECT
FROM "orders" AS "orders"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
AND CAST("lineitem"."l_receiptdate" AS DATE) < CAST('1995-01-01' AS DATE)
AND CAST("lineitem"."l_receiptdate" AS DATE) >= CAST('1994-01-01' AS DATE)
AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate"
AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP')
AND "orders"."o_orderkey" = "lineitem"."l_orderkey"
AND CAST("lineitem"."l_receiptdate" AS DATE) < CAST('1995-01-01' AS DATE)
AND CAST("lineitem"."l_receiptdate" AS DATE) >= CAST('1994-01-01' AS DATE)
GROUP BY
"lineitem"."l_shipmode"
ORDER BY
@ -1224,10 +1224,18 @@ where
order by
s_name;
WITH "_u_0" AS (
SELECT
"part"."p_partkey" AS "p_partkey"
FROM "part" AS "part"
WHERE
"part"."p_name" LIKE 'forest%'
GROUP BY
"part"."p_partkey"
), "_u_1" AS (
SELECT
0.5 * SUM("lineitem"."l_quantity") AS "_col_0",
"lineitem"."l_partkey" AS "_u_1",
"lineitem"."l_suppkey" AS "_u_2"
"lineitem"."l_partkey" AS "_u_2",
"lineitem"."l_suppkey" AS "_u_3"
FROM "lineitem" AS "lineitem"
WHERE
CAST("lineitem"."l_shipdate" AS DATE) < CAST('1995-01-01' AS DATE)
@ -1235,24 +1243,16 @@ WITH "_u_0" AS (
GROUP BY
"lineitem"."l_partkey",
"lineitem"."l_suppkey"
), "_u_3" AS (
SELECT
"part"."p_partkey" AS "p_partkey"
FROM "part" AS "part"
WHERE
"part"."p_name" LIKE 'forest%'
GROUP BY
"part"."p_partkey"
), "_u_4" AS (
SELECT
"partsupp"."ps_suppkey" AS "ps_suppkey"
FROM "partsupp" AS "partsupp"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "partsupp"."ps_partkey" AND "_u_0"."_u_2" = "partsupp"."ps_suppkey"
LEFT JOIN "_u_3" AS "_u_3"
ON "partsupp"."ps_partkey" = "_u_3"."p_partkey"
ON "partsupp"."ps_partkey" = "_u_0"."p_partkey"
LEFT JOIN "_u_1" AS "_u_1"
ON "_u_1"."_u_2" = "partsupp"."ps_partkey" AND "_u_1"."_u_3" = "partsupp"."ps_suppkey"
WHERE
"partsupp"."ps_availqty" > "_u_0"."_col_0" AND NOT "_u_3"."p_partkey" IS NULL
"partsupp"."ps_availqty" > "_u_1"."_col_0" AND NOT "_u_0"."p_partkey" IS NULL
GROUP BY
"partsupp"."ps_suppkey"
)

View file

@ -133,3 +133,475 @@ TPCH_SCHEMA = {
"r_comment": "string",
},
}
TPCDS_SCHEMA = {
"catalog_sales": {
"cs_sold_date_sk": "bigint",
"cs_sold_time_sk": "bigint",
"cs_ship_date_sk": "bigint",
"cs_bill_customer_sk": "bigint",
"cs_bill_cdemo_sk": "bigint",
"cs_bill_hdemo_sk": "bigint",
"cs_bill_addr_sk": "bigint",
"cs_ship_customer_sk": "bigint",
"cs_ship_cdemo_sk": "bigint",
"cs_ship_hdemo_sk": "bigint",
"cs_ship_addr_sk": "bigint",
"cs_call_center_sk": "bigint",
"cs_catalog_page_sk": "bigint",
"cs_ship_mode_sk": "bigint",
"cs_warehouse_sk": "bigint",
"cs_item_sk": "bigint",
"cs_promo_sk": "bigint",
"cs_order_number": "bigint",
"cs_quantity": "bigint",
"cs_wholesale_cost": "double",
"cs_list_price": "double",
"cs_sales_price": "double",
"cs_ext_discount_amt": "double",
"cs_ext_sales_price": "double",
"cs_ext_wholesale_cost": "double",
"cs_ext_list_price": "double",
"cs_ext_tax": "double",
"cs_coupon_amt": "double",
"cs_ext_ship_cost": "double",
"cs_net_paid": "double",
"cs_net_paid_inc_tax": "double",
"cs_net_paid_inc_ship": "double",
"cs_net_paid_inc_ship_tax": "double",
"cs_net_profit": "double",
},
"catalog_returns": {
"cr_returned_date_sk": "bigint",
"cr_returned_time_sk": "bigint",
"cr_item_sk": "bigint",
"cr_refunded_customer_sk": "bigint",
"cr_refunded_cdemo_sk": "bigint",
"cr_refunded_hdemo_sk": "bigint",
"cr_refunded_addr_sk": "bigint",
"cr_returning_customer_sk": "bigint",
"cr_returning_cdemo_sk": "bigint",
"cr_returning_hdemo_sk": "bigint",
"cr_returning_addr_sk": "bigint",
"cr_call_center_sk": "bigint",
"cr_catalog_page_sk": "bigint",
"cr_ship_mode_sk": "bigint",
"cr_warehouse_sk": "bigint",
"cr_reason_sk": "bigint",
"cr_order_number": "bigint",
"cr_return_quantity": "bigint",
"cr_return_amount": "double",
"cr_return_tax": "double",
"cr_return_amt_inc_tax": "double",
"cr_fee": "double",
"cr_return_ship_cost": "double",
"cr_refunded_cash": "double",
"cr_reversed_charge": "double",
"cr_store_credit": "double",
"cr_net_loss": "double",
},
"inventory": {
"inv_date_sk": "bigint",
"inv_item_sk": "bigint",
"inv_warehouse_sk": "bigint",
"inv_quantity_on_hand": "bigint",
},
"store_sales": {
"ss_sold_date_sk": "bigint",
"ss_sold_time_sk": "bigint",
"ss_item_sk": "bigint",
"ss_customer_sk": "bigint",
"ss_cdemo_sk": "bigint",
"ss_hdemo_sk": "bigint",
"ss_addr_sk": "bigint",
"ss_store_sk": "bigint",
"ss_promo_sk": "bigint",
"ss_ticket_number": "bigint",
"ss_quantity": "bigint",
"ss_wholesale_cost": "double",
"ss_list_price": "double",
"ss_sales_price": "double",
"ss_ext_discount_amt": "double",
"ss_ext_sales_price": "double",
"ss_ext_wholesale_cost": "double",
"ss_ext_list_price": "double",
"ss_ext_tax": "double",
"ss_coupon_amt": "double",
"ss_net_paid": "double",
"ss_net_paid_inc_tax": "double",
"ss_net_profit": "double",
},
"store_returns": {
"sr_returned_date_sk": "bigint",
"sr_return_time_sk": "bigint",
"sr_item_sk": "bigint",
"sr_customer_sk": "bigint",
"sr_cdemo_sk": "bigint",
"sr_hdemo_sk": "bigint",
"sr_addr_sk": "bigint",
"sr_store_sk": "bigint",
"sr_reason_sk": "bigint",
"sr_ticket_number": "bigint",
"sr_return_quantity": "bigint",
"sr_return_amt": "double",
"sr_return_tax": "double",
"sr_return_amt_inc_tax": "double",
"sr_fee": "double",
"sr_return_ship_cost": "double",
"sr_refunded_cash": "double",
"sr_reversed_charge": "double",
"sr_store_credit": "double",
"sr_net_loss": "double",
},
"web_sales": {
"ws_sold_date_sk": "bigint",
"ws_sold_time_sk": "bigint",
"ws_ship_date_sk": "bigint",
"ws_item_sk": "bigint",
"ws_bill_customer_sk": "bigint",
"ws_bill_cdemo_sk": "bigint",
"ws_bill_hdemo_sk": "bigint",
"ws_bill_addr_sk": "bigint",
"ws_ship_customer_sk": "bigint",
"ws_ship_cdemo_sk": "bigint",
"ws_ship_hdemo_sk": "bigint",
"ws_ship_addr_sk": "bigint",
"ws_web_page_sk": "bigint",
"ws_web_site_sk": "bigint",
"ws_ship_mode_sk": "bigint",
"ws_warehouse_sk": "bigint",
"ws_promo_sk": "bigint",
"ws_order_number": "bigint",
"ws_quantity": "bigint",
"ws_wholesale_cost": "double",
"ws_list_price": "double",
"ws_sales_price": "double",
"ws_ext_discount_amt": "double",
"ws_ext_sales_price": "double",
"ws_ext_wholesale_cost": "double",
"ws_ext_list_price": "double",
"ws_ext_tax": "double",
"ws_coupon_amt": "double",
"ws_ext_ship_cost": "double",
"ws_net_paid": "double",
"ws_net_paid_inc_tax": "double",
"ws_net_paid_inc_ship": "double",
"ws_net_paid_inc_ship_tax": "double",
"ws_net_profit": "double",
},
"web_returns": {
"wr_returned_date_sk": "bigint",
"wr_returned_time_sk": "bigint",
"wr_item_sk": "bigint",
"wr_refunded_customer_sk": "bigint",
"wr_refunded_cdemo_sk": "bigint",
"wr_refunded_hdemo_sk": "bigint",
"wr_refunded_addr_sk": "bigint",
"wr_returning_customer_sk": "bigint",
"wr_returning_cdemo_sk": "bigint",
"wr_returning_hdemo_sk": "bigint",
"wr_returning_addr_sk": "bigint",
"wr_web_page_sk": "bigint",
"wr_reason_sk": "bigint",
"wr_order_number": "bigint",
"wr_return_quantity": "bigint",
"wr_return_amt": "double",
"wr_return_tax": "double",
"wr_return_amt_inc_tax": "double",
"wr_fee": "double",
"wr_return_ship_cost": "double",
"wr_refunded_cash": "double",
"wr_reversed_charge": "double",
"wr_account_credit": "double",
"wr_net_loss": "double",
},
"call_center": {
"cc_call_center_sk": "bigint",
"cc_call_center_id": "string",
"cc_rec_start_date": "string",
"cc_rec_end_date": "string",
"cc_closed_date_sk": "bigint",
"cc_open_date_sk": "bigint",
"cc_name": "string",
"cc_class": "string",
"cc_employees": "bigint",
"cc_sq_ft": "bigint",
"cc_hours": "string",
"cc_manager": "string",
"cc_mkt_id": "bigint",
"cc_mkt_class": "string",
"cc_mkt_desc": "string",
"cc_market_manager": "string",
"cc_division": "bigint",
"cc_division_name": "string",
"cc_company": "bigint",
"cc_company_name": "string",
"cc_street_number": "string",
"cc_street_name": "string",
"cc_street_type": "string",
"cc_suite_number": "string",
"cc_city": "string",
"cc_county": "string",
"cc_state": "string",
"cc_zip": "string",
"cc_country": "string",
"cc_gmt_offset": "double",
"cc_tax_percentage": "double",
},
"catalog_page": {
"cp_catalog_page_sk": "bigint",
"cp_catalog_page_id": "string",
"cp_start_date_sk": "bigint",
"cp_end_date_sk": "bigint",
"cp_department": "string",
"cp_catalog_number": "bigint",
"cp_catalog_page_number": "bigint",
"cp_description": "string",
"cp_type": "string",
},
"customer": {
"c_customer_sk": "bigint",
"c_customer_id": "string",
"c_current_cdemo_sk": "bigint",
"c_current_hdemo_sk": "bigint",
"c_current_addr_sk": "bigint",
"c_first_shipto_date_sk": "bigint",
"c_first_sales_date_sk": "bigint",
"c_salutation": "string",
"c_first_name": "string",
"c_last_name": "string",
"c_preferred_cust_flag": "string",
"c_birth_day": "bigint",
"c_birth_month": "bigint",
"c_birth_year": "bigint",
"c_birth_country": "string",
"c_login": "string",
"c_email_address": "string",
"c_last_review_date": "string",
},
"customer_address": {
"ca_address_sk": "bigint",
"ca_address_id": "string",
"ca_street_number": "string",
"ca_street_name": "string",
"ca_street_type": "string",
"ca_suite_number": "string",
"ca_city": "string",
"ca_county": "string",
"ca_state": "string",
"ca_zip": "string",
"ca_country": "string",
"ca_gmt_offset": "double",
"ca_location_type": "string",
},
"customer_demographics": {
"cd_demo_sk": "bigint",
"cd_gender": "string",
"cd_marital_status": "string",
"cd_education_status": "string",
"cd_purchase_estimate": "bigint",
"cd_credit_rating": "string",
"cd_dep_count": "bigint",
"cd_dep_employed_count": "bigint",
"cd_dep_college_count": "bigint",
},
"date_dim": {
"d_date_sk": "bigint",
"d_date_id": "string",
"d_date": "string",
"d_month_seq": "bigint",
"d_week_seq": "bigint",
"d_quarter_seq": "bigint",
"d_year": "bigint",
"d_dow": "bigint",
"d_moy": "bigint",
"d_dom": "bigint",
"d_qoy": "bigint",
"d_fy_year": "bigint",
"d_fy_quarter_seq": "bigint",
"d_fy_week_seq": "bigint",
"d_day_name": "string",
"d_quarter_name": "string",
"d_holiday": "string",
"d_weekend": "string",
"d_following_holiday": "string",
"d_first_dom": "bigint",
"d_last_dom": "bigint",
"d_same_day_ly": "bigint",
"d_same_day_lq": "bigint",
"d_current_day": "string",
"d_current_week": "string",
"d_current_month": "string",
"d_current_quarter": "string",
"d_current_year": "string",
},
"household_demographics": {
"hd_demo_sk": "bigint",
"hd_income_band_sk": "bigint",
"hd_buy_potential": "string",
"hd_dep_count": "bigint",
"hd_vehicle_count": "bigint",
},
"income_band": {
"ib_income_band_sk": "bigint",
"ib_lower_bound": "bigint",
"ib_upper_bound": "bigint",
},
"item": {
"i_item_sk": "bigint",
"i_item_id": "string",
"i_rec_start_date": "string",
"i_rec_end_date": "string",
"i_item_desc": "string",
"i_current_price": "double",
"i_wholesale_cost": "double",
"i_brand_id": "bigint",
"i_brand": "string",
"i_class_id": "bigint",
"i_class": "string",
"i_category_id": "bigint",
"i_category": "string",
"i_manufact_id": "bigint",
"i_manufact": "string",
"i_size": "string",
"i_formulation": "string",
"i_color": "string",
"i_units": "string",
"i_container": "string",
"i_manager_id": "bigint",
"i_product_name": "string",
},
"promotion": {
"p_promo_sk": "bigint",
"p_promo_id": "string",
"p_start_date_sk": "bigint",
"p_end_date_sk": "bigint",
"p_item_sk": "bigint",
"p_cost": "double",
"p_response_target": "bigint",
"p_promo_name": "string",
"p_channel_dmail": "string",
"p_channel_email": "string",
"p_channel_catalog": "string",
"p_channel_tv": "string",
"p_channel_radio": "string",
"p_channel_press": "string",
"p_channel_event": "string",
"p_channel_demo": "string",
"p_channel_details": "string",
"p_purpose": "string",
"p_discount_active": "string",
},
"reason": {"r_reason_sk": "bigint", "r_reason_id": "string", "r_reason_desc": "string"},
"ship_mode": {
"sm_ship_mode_sk": "bigint",
"sm_ship_mode_id": "string",
"sm_type": "string",
"sm_code": "string",
"sm_carrier": "string",
"sm_contract": "string",
},
"store": {
"s_store_sk": "bigint",
"s_store_id": "string",
"s_rec_start_date": "string",
"s_rec_end_date": "string",
"s_closed_date_sk": "bigint",
"s_store_name": "string",
"s_number_employees": "bigint",
"s_floor_space": "bigint",
"s_hours": "string",
"s_manager": "string",
"s_market_id": "bigint",
"s_geography_class": "string",
"s_market_desc": "string",
"s_market_manager": "string",
"s_division_id": "bigint",
"s_division_name": "string",
"s_company_id": "bigint",
"s_company_name": "string",
"s_street_number": "string",
"s_street_name": "string",
"s_street_type": "string",
"s_suite_number": "string",
"s_city": "string",
"s_county": "string",
"s_state": "string",
"s_zip": "string",
"s_country": "string",
"s_gmt_offset": "double",
"s_tax_precentage": "double",
},
"time_dim": {
"t_time_sk": "bigint",
"t_time_id": "string",
"t_time": "bigint",
"t_hour": "bigint",
"t_minute": "bigint",
"t_second": "bigint",
"t_am_pm": "string",
"t_shift": "string",
"t_sub_shift": "string",
"t_meal_time": "string",
},
"warehouse": {
"w_warehouse_sk": "bigint",
"w_warehouse_id": "string",
"w_warehouse_name": "string",
"w_warehouse_sq_ft": "bigint",
"w_street_number": "string",
"w_street_name": "string",
"w_street_type": "string",
"w_suite_number": "string",
"w_city": "string",
"w_county": "string",
"w_state": "string",
"w_zip": "string",
"w_country": "string",
"w_gmt_offset": "double",
},
"web_page": {
"wp_web_page_sk": "bigint",
"wp_web_page_id": "string",
"wp_rec_start_date": "string",
"wp_rec_end_date": "string",
"wp_creation_date_sk": "bigint",
"wp_access_date_sk": "bigint",
"wp_autogen_flag": "string",
"wp_customer_sk": "bigint",
"wp_url": "string",
"wp_type": "string",
"wp_char_count": "bigint",
"wp_link_count": "bigint",
"wp_image_count": "bigint",
"wp_max_ad_count": "bigint",
},
"web_site": {
"web_site_sk": "bigint",
"web_site_id": "string",
"web_rec_start_date": "string",
"web_rec_end_date": "string",
"web_name": "string",
"web_open_date_sk": "bigint",
"web_close_date_sk": "bigint",
"web_class": "string",
"web_manager": "string",
"web_mkt_id": "bigint",
"web_mkt_class": "string",
"web_mkt_desc": "string",
"web_market_manager": "string",
"web_company_id": "bigint",
"web_company_name": "string",
"web_street_number": "string",
"web_street_name": "string",
"web_street_type": "string",
"web_suite_number": "string",
"web_city": "string",
"web_county": "string",
"web_state": "string",
"web_zip": "string",
"web_country": "string",
"web_gmt_offset": "string",
"web_tax_percentage": "double",
},
}

View file

@ -155,6 +155,19 @@ class TestDiff(unittest.TestCase):
with self.assertRaises(ValueError):
diff(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt), (expr_src, expr_tgt)])
def test_identifier(self):
expr_src = parse_one("SELECT a FROM tbl")
expr_tgt = parse_one("SELECT a, tbl.b from tbl")
self._validate_delta_only(
diff(expr_src, expr_tgt),
[
Insert(expression=exp.to_identifier("b")),
Insert(expression=exp.to_column("tbl.b")),
Insert(expression=exp.to_identifier("tbl")),
],
)
def _validate_delta_only(self, actual_diff, expected_delta):
actual_delta = _delta_only(actual_diff)
self.assertEqual(set(actual_delta), set(expected_delta))

View file

@ -401,6 +401,16 @@ class TestExecutor(unittest.TestCase):
],
)
table1_view = exp.Select().select("id", "sub_type").from_("table1").subquery()
select_from_sub_query = exp.Select().select("id AS id_alias", "sub_type").from_(table1_view)
expression = exp.Select().select("*").from_("cte1").with_("cte1", as_=select_from_sub_query)
schema = {"table1": {"id": "str", "sub_type": "str"}}
executed = execute(expression, tables={t: [] for t in schema}, schema=schema)
self.assertEqual(executed.rows, [])
self.assertEqual(executed.columns, ("id_alias", "sub_type"))
def test_correlated_count(self):
tables = {
"parts": [{"pnum": 0, "qoh": 1}],

View file

@ -15,6 +15,20 @@ class TestExpressions(unittest.TestCase):
self.assertEqual(parse_one("x(1)").find(exp.Literal).depth, 1)
def test_eq(self):
self.assertEqual(exp.to_identifier("a"), exp.to_identifier("A"))
self.assertEqual(
exp.Column(table=exp.to_identifier("b"), this=exp.to_identifier("b")),
exp.Column(this=exp.to_identifier("b"), table=exp.to_identifier("b")),
)
self.assertEqual(exp.to_identifier("a", quoted=True), exp.to_identifier("A"))
self.assertNotEqual(exp.to_identifier("A", quoted=True), exp.to_identifier("A"))
self.assertNotEqual(
exp.to_identifier("A", quoted=True), exp.to_identifier("a", quoted=True)
)
self.assertNotEqual(parse_one("'x'"), parse_one("'X'"))
self.assertNotEqual(parse_one("'1'"), parse_one("1"))
self.assertEqual(parse_one("`a`", read="hive"), parse_one('"a"'))
self.assertEqual(parse_one("`a`", read="hive"), parse_one('"a" '))
self.assertEqual(parse_one("`a`.b", read="hive"), parse_one('"a"."b"'))
@ -330,6 +344,7 @@ class TestExpressions(unittest.TestCase):
self.assertEqual(parse_one("x + y * 2").sql(), "x + y * 2")
self.assertEqual(parse_one('select "x"').sql(dialect="hive", pretty=True), "SELECT\n `x`")
self.assertEqual(parse_one("X + y").sql(identify=True, normalize=True), '"x" + "y"')
self.assertEqual(parse_one('"X" + Y').sql(identify=True, normalize=True), '"X" + "y"')
self.assertEqual(parse_one("SUM(X)").sql(identify=True, normalize=True), 'SUM("x")')
def test_transform_with_arguments(self):
@ -450,22 +465,28 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("ARRAY_CONTAINS(a, 'a')"), exp.ArrayContains)
self.assertIsInstance(parse_one("ARRAY_SIZE(a)"), exp.ArraySize)
self.assertIsInstance(parse_one("AVG(a)"), exp.Avg)
self.assertIsInstance(parse_one("BEGIN DEFERRED TRANSACTION"), exp.Transaction)
self.assertIsInstance(parse_one("CEIL(a)"), exp.Ceil)
self.assertIsInstance(parse_one("CEILING(a)"), exp.Ceil)
self.assertIsInstance(parse_one("COALESCE(a, b)"), exp.Coalesce)
self.assertIsInstance(parse_one("COMMIT"), exp.Commit)
self.assertIsInstance(parse_one("COUNT(a)"), exp.Count)
self.assertIsInstance(parse_one("COUNT_IF(a > 0)"), exp.CountIf)
self.assertIsInstance(parse_one("DATE_ADD(a, 1)"), exp.DateAdd)
self.assertIsInstance(parse_one("DATE_DIFF(a, 2)"), exp.DateDiff)
self.assertIsInstance(parse_one("DATE_STR_TO_DATE(a)"), exp.DateStrToDate)
self.assertIsInstance(parse_one("DAY(a)"), exp.Day)
self.assertIsInstance(parse_one("EXP(a)"), exp.Exp)
self.assertIsInstance(parse_one("FLOOR(a)"), exp.Floor)
self.assertIsInstance(parse_one("GENERATE_SERIES(a, b, c)"), exp.GenerateSeries)
self.assertIsInstance(parse_one("GLOB(x, y)"), exp.Glob)
self.assertIsInstance(parse_one("GREATEST(a, b)"), exp.Greatest)
self.assertIsInstance(parse_one("IF(a, b, c)"), exp.If)
self.assertIsInstance(parse_one("INITCAP(a)"), exp.Initcap)
self.assertIsInstance(parse_one("JSON_EXTRACT(a, '$.name')"), exp.JSONExtract)
self.assertIsInstance(parse_one("JSON_EXTRACT_SCALAR(a, '$.name')"), exp.JSONExtractScalar)
self.assertIsInstance(parse_one("LEAST(a, b)"), exp.Least)
self.assertIsInstance(parse_one("LIKE(x, y)"), exp.Like)
self.assertIsInstance(parse_one("LN(a)"), exp.Ln)
self.assertIsInstance(parse_one("LOG10(a)"), exp.Log10)
self.assertIsInstance(parse_one("MAX(a)"), exp.Max)
@ -477,6 +498,7 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("QUANTILE(a, 0.90)"), exp.Quantile)
self.assertIsInstance(parse_one("REGEXP_LIKE(a, 'test')"), exp.RegexpLike)
self.assertIsInstance(parse_one("REGEXP_SPLIT(a, 'test')"), exp.RegexpSplit)
self.assertIsInstance(parse_one("ROLLBACK"), exp.Rollback)
self.assertIsInstance(parse_one("ROUND(a)"), exp.Round)
self.assertIsInstance(parse_one("ROUND(a, 2)"), exp.Round)
self.assertIsInstance(parse_one("SPLIT(a, 'test')"), exp.Split)
@ -504,11 +526,6 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("VARIANCE(a)"), exp.Variance)
self.assertIsInstance(parse_one("VARIANCE_POP(a)"), exp.VariancePop)
self.assertIsInstance(parse_one("YEAR(a)"), exp.Year)
self.assertIsInstance(parse_one("BEGIN DEFERRED TRANSACTION"), exp.Transaction)
self.assertIsInstance(parse_one("COMMIT"), exp.Commit)
self.assertIsInstance(parse_one("ROLLBACK"), exp.Rollback)
self.assertIsInstance(parse_one("GENERATE_SERIES(a, b, c)"), exp.GenerateSeries)
self.assertIsInstance(parse_one("COUNT_IF(a > 0)"), exp.CountIf)
def test_column(self):
column = parse_one("a.b.c.d")
@ -542,6 +559,7 @@ class TestExpressions(unittest.TestCase):
self.assertEqual(column.table, "a")
self.assertIsInstance(parse_one("*"), exp.Star)
self.assertEqual(exp.column("a", table="b", db="c", catalog="d"), exp.to_column("d.c.b.a"))
def test_text(self):
column = parse_one("a.b.c.d.e")

View file

@ -1,4 +1,5 @@
import unittest
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import partial
import duckdb
@ -11,6 +12,7 @@ from sqlglot.optimizer.annotate_types import annotate_types
from sqlglot.optimizer.scope import build_scope, traverse_scope, walk_in_scope
from sqlglot.schema import MappingSchema
from tests.helpers import (
TPCDS_SCHEMA,
TPCH_SCHEMA,
load_sql_fixture_pairs,
load_sql_fixtures,
@ -18,6 +20,28 @@ from tests.helpers import (
)
def parse_and_optimize(func, sql, dialect, **kwargs):
return func(parse_one(sql, read=dialect), **kwargs)
def qualify_columns(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
return expression
def pushdown_projections(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
expression = optimizer.pushdown_projections.pushdown_projections(expression, **kwargs)
return expression
def normalize(expression, **kwargs):
expression = optimizer.normalize.normalize(expression, dnf=False)
return optimizer.simplify.simplify(expression)
class TestOptimizer(unittest.TestCase):
maxDiff = None
@ -74,29 +98,35 @@ class TestOptimizer(unittest.TestCase):
}
def check_file(self, file, func, pretty=False, execute=False, **kwargs):
for i, (meta, sql, expected) in enumerate(
load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1
):
title = meta.get("title") or f"{i}, {sql}"
dialect = meta.get("dialect")
leave_tables_isolated = meta.get("leave_tables_isolated")
with ProcessPoolExecutor() as pool:
results = {}
func_kwargs = {**kwargs}
if leave_tables_isolated is not None:
func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated)
for i, (meta, sql, expected) in enumerate(
load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1
):
title = meta.get("title") or f"{i}, {sql}"
dialect = meta.get("dialect")
execute = execute if meta.get("execute") is None else False
leave_tables_isolated = meta.get("leave_tables_isolated")
func_kwargs = {**kwargs}
if leave_tables_isolated is not None:
func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated)
future = pool.submit(parse_and_optimize, func, sql, dialect, **func_kwargs)
results[future] = (sql, title, expected, dialect, execute)
for future in as_completed(results):
optimized = future.result()
sql, title, expected, dialect, execute = results[future]
with self.subTest(title):
optimized = func(parse_one(sql, read=dialect), **func_kwargs)
self.assertEqual(
expected,
optimized.sql(pretty=pretty, dialect=dialect),
)
should_execute = meta.get("execute")
if should_execute is None:
should_execute = execute
if string_to_bool(should_execute):
if string_to_bool(execute):
with self.subTest(f"(execute) {title}"):
df1 = self.conn.execute(
sqlglot.transpile(sql, read=dialect, write="duckdb")[0]
@ -137,25 +167,19 @@ class TestOptimizer(unittest.TestCase):
"(x AND y) OR (x AND z)",
)
self.check_file(
"normalize",
optimizer.normalize.normalize,
self.assertEqual(
optimizer.normalize.normalize(
parse_one("x AND (y OR z)"),
).sql(),
"x AND (y OR z)",
)
def test_qualify_columns(self):
def qualify_columns(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
return expression
self.check_file("normalize", normalize)
def test_qualify_columns(self):
self.check_file("qualify_columns", qualify_columns, execute=True, schema=self.schema)
def test_qualify_columns__with_invisible(self):
def qualify_columns(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
return expression
schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}})
self.check_file("qualify_columns__with_invisible", qualify_columns, schema=schema)
@ -172,17 +196,15 @@ class TestOptimizer(unittest.TestCase):
self.check_file("lower_identities", optimizer.lower_identities.lower_identities)
def test_pushdown_projection(self):
def pushdown_projections(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
expression = optimizer.pushdown_projections.pushdown_projections(expression, **kwargs)
return expression
self.check_file("pushdown_projections", pushdown_projections, schema=self.schema)
def test_simplify(self):
self.check_file("simplify", optimizer.simplify.simplify)
expression = parse_one("TRUE AND TRUE AND TRUE")
self.assertEqual(exp.true(), optimizer.simplify.simplify(expression))
self.assertEqual(exp.true(), optimizer.simplify.simplify(expression.this))
def test_unnest_subqueries(self):
self.check_file(
"unnest_subqueries",
@ -257,6 +279,9 @@ class TestOptimizer(unittest.TestCase):
def test_tpch(self):
self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True)
def test_tpcds(self):
self.check_file("tpc-ds/tpc-ds", optimizer.optimize, schema=TPCDS_SCHEMA, pretty=True)
def test_file_schema(self):
expression = parse_one(
"""
@ -578,6 +603,10 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
)
self.assertEqual(expression.expressions[0].type.this, target_type)
def test_concat_annotation(self):
expression = annotate_types(parse_one("CONCAT('A', 'B')"))
self.assertEqual(expression.type.this, exp.DataType.Type.VARCHAR)
def test_recursive_cte(self):
query = parse_one(
"""

View file

@ -79,6 +79,9 @@ class TestParser(unittest.TestCase):
def test_union_order(self):
self.assertIsInstance(parse_one("SELECT * FROM (SELECT 1) UNION SELECT 2"), exp.Union)
self.assertIsInstance(
parse_one("SELECT x FROM y HAVING x > (SELECT 1) UNION SELECT 2"), exp.Union
)
def test_select(self):
self.assertIsNotNone(parse_one("select 1 natural"))
@ -357,7 +360,7 @@ class TestParser(unittest.TestCase):
@patch("sqlglot.parser.logger")
def test_create_table_error(self, logger):
parse_one(
"""CREATE TABLE PARTITION""",
"""CREATE TABLE SELECT""",
error_level=ErrorLevel.WARN,
)

View file

@ -507,21 +507,12 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""",
more_than_max_errors = "(((("
expected_messages = (
"Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n"
"Required keyword: 'this' missing for <class 'sqlglot.expressions.Paren'>. Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n"
"Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n"
"Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n"
"... and 2 more"
)
expected_errors = [
{
"description": "Expecting )",
"line": 1,
"col": 4,
"start_context": "(((",
"highlight": "(",
"end_context": "",
"into_expression": None,
},
{
"description": "Required keyword: 'this' missing for <class 'sqlglot.expressions.Paren'>",
"line": 1,
@ -531,9 +522,18 @@ FROM bar /* comment 5 */, tbl /* comment 6 */""",
"end_context": "",
"into_expression": None,
},
{
"description": "Expecting )",
"line": 1,
"col": 4,
"start_context": "(((",
"highlight": "(",
"end_context": "",
"into_expression": None,
},
]
# Also expect three trailing structured errors that match the first
expected_errors += [expected_errors[0]] * 3
expected_errors += [expected_errors[1]] * 3
with self.assertRaises(ParseError) as ctx:
transpile(more_than_max_errors, error_level=ErrorLevel.RAISE)

37
tests/tpch.py Normal file
View file

@ -0,0 +1,37 @@
import time
from sqlglot.optimizer import optimize
INPUT = ""
OUTPUT = ""
NUM = 99
SCHEMA = {}
KIND = "DS"
with open(OUTPUT, "w", encoding="UTF-8") as fixture:
for i in range(NUM):
i = i + 1
with open(INPUT.format(i=i), encoding="UTF-8") as file:
original = "\n".join(
line.rstrip()
for line in file.read().split(";")[0].split("\n")
if not line.startswith("--")
)
original = original.replace("`", '"')
now = time.time()
try:
optimized = optimize(original, schema=SCHEMA)
except Exception as e:
print("****", i, e, "****")
continue
fixture.write(
f"""--------------------------------------
-- TPC-{KIND} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};
"""
)
print(i, time.time() - now)