1
0
Fork 0

Adding upstream version 6.2.6.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:37:25 +01:00
parent 8425a9678d
commit d62bab68ae
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
49 changed files with 1741 additions and 566 deletions

View file

@ -236,3 +236,24 @@ class TestBigQuery(Validator):
"snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10",
},
)
self.validate_all(
"SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
write={
"spark": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
"bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)])",
"snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)",
},
)
self.validate_all(
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
write={
"bigquery": "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
},
)
def test_user_defined_functions(self):
self.validate_identity(
"CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 LANGUAGE js AS 'return x*y;'"
)
self.validate_identity("CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y)")
self.validate_identity("CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE <q STRING, r INT64> AS SELECT s, t")

View file

@ -13,9 +13,6 @@ from sqlglot import (
class Validator(unittest.TestCase):
dialect = None
def validate(self, sql, target, **kwargs):
self.assertEqual(transpile(sql, **kwargs)[0], target)
def validate_identity(self, sql):
self.assertEqual(transpile(sql, read=self.dialect, write=self.dialect)[0], sql)
@ -258,6 +255,7 @@ class TestDialect(Validator):
"duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%M-%d'))",
"hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-mm-dd')",
"presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%i-%d'))",
"starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%i-%d')",
},
)
self.validate_all(
@ -266,6 +264,7 @@ class TestDialect(Validator):
"duckdb": "CAST('2020-01-01' AS DATE)",
"hive": "TO_DATE('2020-01-01')",
"presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')",
"starrocks": "TO_DATE('2020-01-01')",
},
)
self.validate_all(
@ -341,6 +340,7 @@ class TestDialect(Validator):
"duckdb": "STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), y)",
"hive": "FROM_UNIXTIME(x, y)",
"presto": "DATE_FORMAT(FROM_UNIXTIME(x), y)",
"starrocks": "FROM_UNIXTIME(x, y)",
},
)
self.validate_all(
@ -349,6 +349,7 @@ class TestDialect(Validator):
"duckdb": "TO_TIMESTAMP(CAST(x AS BIGINT))",
"hive": "FROM_UNIXTIME(x)",
"presto": "FROM_UNIXTIME(x)",
"starrocks": "FROM_UNIXTIME(x)",
},
)
self.validate_all(
@ -840,10 +841,20 @@ class TestDialect(Validator):
"starrocks": UnsupportedError,
},
)
self.validate_all(
"POSITION(' ' in x)",
write={
"duckdb": "STRPOS(x, ' ')",
"postgres": "STRPOS(x, ' ')",
"presto": "STRPOS(x, ' ')",
"spark": "LOCATE(' ', x)",
},
)
self.validate_all(
"STR_POSITION(x, 'a')",
write={
"duckdb": "STRPOS(x, 'a')",
"postgres": "STRPOS(x, 'a')",
"presto": "STRPOS(x, 'a')",
"spark": "LOCATE('a', x)",
},

View file

@ -1,3 +1,4 @@
from sqlglot import ErrorLevel, UnsupportedError, transpile
from tests.dialects.test_dialect import Validator
@ -250,3 +251,10 @@ class TestDuckDB(Validator):
"spark": "MONTH('2021-03-01')",
},
)
with self.assertRaises(UnsupportedError):
transpile(
"SELECT a FROM b PIVOT(SUM(x) FOR y IN ('z', 'q'))",
read="duckdb",
unsupported_level=ErrorLevel.IMMEDIATE,
)

View file

@ -127,17 +127,17 @@ class TestHive(Validator):
def test_ddl(self):
self.validate_all(
"CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
"CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
write={
"presto": "CREATE TABLE test WITH (FORMAT = 'parquet', x = '1', Z = '2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x' = '1', 'Z' = '2') AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='parquet', x='1', Z='2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
},
)
self.validate_all(
"CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
write={
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
},

View file

@ -119,3 +119,39 @@ class TestMySQL(Validator):
"sqlite": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC, '')",
},
)
self.validate_identity(
"CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
)
self.validate_identity(
"CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
)
self.validate_identity(
"CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'"
)
self.validate_all(
"""
CREATE TABLE `t_customer_account` (
"id" int(11) NOT NULL AUTO_INCREMENT,
"customer_id" int(11) DEFAULT NULL COMMENT '客户id',
"bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
"account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
PRIMARY KEY ("id")
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表'
""",
write={
"mysql": """CREATE TABLE `t_customer_account` (
'id' INT(11) NOT NULL AUTO_INCREMENT,
'customer_id' INT(11) DEFAULT NULL COMMENT '客户id',
'bank' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
'account_no' VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
PRIMARY KEY('id')
)
ENGINE=InnoDB
AUTO_INCREMENT=1
DEFAULT CHARACTER SET=utf8
COLLATE=utf8_bin
COMMENT='客户账户表'"""
},
pretty=True,
)

View file

@ -171,7 +171,7 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
write={
"presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
},
@ -179,15 +179,15 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1",
write={
"presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X' = '1', 'Z' = '2') AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET', X='1', Z='2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
},
)
self.validate_all(
"CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
"CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
write={
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY = ARRAY['y', 'z'])",
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
},
@ -195,9 +195,9 @@ class TestPresto(Validator):
self.validate_all(
"CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y",
write={
"presto": "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y",
"hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y",
"spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by' = ARRAY('y'), 'bucket_count' = 64) AS SELECT 1 AS y",
"presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y",
"hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
"spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
},
)
self.validate_all(
@ -217,11 +217,12 @@ class TestPresto(Validator):
},
)
self.validate(
self.validate_all(
"SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname",
"SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname",
read="presto",
write="presto",
write={
"presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname",
"spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname NULLS LAST",
},
)
def test_quotes(self):

View file

@ -143,6 +143,31 @@ class TestSnowflake(Validator):
"snowflake": r"SELECT 'a \' \\ \\t \\x21 z $ '",
},
)
self.validate_identity("SELECT REGEXP_LIKE(a, b, c)")
self.validate_all(
"SELECT RLIKE(a, b)",
write={
"snowflake": "SELECT REGEXP_LIKE(a, b)",
},
)
self.validate_all(
"SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)",
write={
"snowflake": "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)",
},
)
self.validate_all(
"SELECT a FROM test pivot",
write={
"snowflake": "SELECT a FROM test AS pivot",
},
)
self.validate_all(
"SELECT a FROM test unpivot",
write={
"snowflake": "SELECT a FROM test AS unpivot",
},
)
def test_null_treatment(self):
self.validate_all(
@ -220,3 +245,51 @@ class TestSnowflake(Validator):
"snowflake": "SELECT EXTRACT(month FROM CAST(a AS DATETIME))",
},
)
def test_semi_structured_types(self):
self.validate_identity("SELECT CAST(a AS VARIANT)")
self.validate_all(
"SELECT a::VARIANT",
write={
"snowflake": "SELECT CAST(a AS VARIANT)",
"tsql": "SELECT CAST(a AS SQL_VARIANT)",
},
)
self.validate_identity("SELECT CAST(a AS ARRAY)")
self.validate_all(
"ARRAY_CONSTRUCT(0, 1, 2)",
write={
"snowflake": "[0, 1, 2]",
"bigquery": "[0, 1, 2]",
"duckdb": "LIST_VALUE(0, 1, 2)",
"presto": "ARRAY[0, 1, 2]",
"spark": "ARRAY(0, 1, 2)",
},
)
self.validate_all(
"SELECT a::OBJECT",
write={
"snowflake": "SELECT CAST(a AS OBJECT)",
},
)
def test_ddl(self):
self.validate_identity(
"CREATE TABLE a (x DATE, y BIGINT) WITH (PARTITION BY (x), integration='q', auto_refresh=TRUE, file_format=(type = parquet))"
)
self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x")
def test_user_defined_functions(self):
self.validate_all(
"CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$",
write={
"snowflake": "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS ' SELECT 1 '",
},
)
self.validate_all(
"CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'",
write={
"snowflake": "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'",
"bigquery": "CREATE TABLE FUNCTION a() RETURNS TABLE <b INT64> AS SELECT 1",
},
)

View file

@ -34,7 +34,7 @@ class TestSpark(Validator):
self.validate_all(
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
write={
"presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY = ARRAY['MONTHS'])",
"presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
"hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
},
@ -42,7 +42,7 @@ class TestSpark(Validator):
self.validate_all(
"CREATE TABLE test STORED AS PARQUET AS SELECT 1",
write={
"presto": "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
},
@ -56,9 +56,9 @@ class TestSpark(Validator):
)
COMMENT='Test comment: blah'
WITH (
PARTITIONED_BY = ARRAY['date'],
FORMAT = 'ICEBERG',
x = '1'
PARTITIONED_BY=ARRAY['date'],
FORMAT='ICEBERG',
x='1'
)""",
"hive": """CREATE TABLE blah (
col_a INT
@ -69,7 +69,7 @@ PARTITIONED BY (
)
STORED AS ICEBERG
TBLPROPERTIES (
'x' = '1'
'x'='1'
)""",
"spark": """CREATE TABLE blah (
col_a INT
@ -80,7 +80,7 @@ PARTITIONED BY (
)
USING ICEBERG
TBLPROPERTIES (
'x' = '1'
'x'='1'
)""",
},
pretty=True,

View file

@ -15,6 +15,14 @@ class TestTSQL(Validator):
},
)
self.validate_all(
"CONVERT(INT, CONVERT(NUMERIC, '444.75'))",
write={
"mysql": "CAST(CAST('444.75' AS DECIMAL) AS INT)",
"tsql": "CAST(CAST('444.75' AS NUMERIC) AS INTEGER)",
},
)
def test_types(self):
self.validate_identity("CAST(x AS XML)")
self.validate_identity("CAST(x AS UNIQUEIDENTIFIER)")
@ -24,3 +32,13 @@ class TestTSQL(Validator):
self.validate_identity("CAST(x AS IMAGE)")
self.validate_identity("CAST(x AS SQL_VARIANT)")
self.validate_identity("CAST(x AS BIT)")
self.validate_all(
"CAST(x AS DATETIME2)",
read={
"": "CAST(x AS DATETIME)",
},
write={
"mysql": "CAST(x AS DATETIME)",
"tsql": "CAST(x AS DATETIME2)",
},
)

View file

@ -8,6 +8,7 @@ SUM(CASE WHEN x > 1 THEN 1 ELSE 0 END) / y
1.1E10
1.12e-10
-11.023E7 * 3
0.2
(1 * 2) / (3 - 5)
((TRUE))
''
@ -167,7 +168,7 @@ SELECT LEAD(a) OVER (ORDER BY b) AS a
SELECT LEAD(a, 1) OVER (PARTITION BY a ORDER BY a) AS x
SELECT LEAD(a, 1, b) OVER (PARTITION BY a ORDER BY a) AS x
SELECT X((a, b) -> a + b, z -> z) AS x
SELECT X(a -> "a" + ("z" - 1))
SELECT X(a -> a + ("z" - 1))
SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)
SELECT test.* FROM test
SELECT a AS b FROM test
@ -258,15 +259,24 @@ SELECT a FROM test TABLESAMPLE(100)
SELECT a FROM test TABLESAMPLE(100 ROWS)
SELECT a FROM test TABLESAMPLE BERNOULLI (50)
SELECT a FROM test TABLESAMPLE SYSTEM (75)
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q'))
SELECT a FROM test PIVOT(SOMEAGG(x, y, z) FOR q IN (1))
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) PIVOT(MAX(b) FOR c IN ('d'))
SELECT a FROM (SELECT a, b FROM test) PIVOT(SUM(x) FOR y IN ('z', 'q'))
SELECT a FROM test UNPIVOT(x FOR y IN (z, q)) AS x
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE(0.1)
SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) UNPIVOT(x FOR y IN (z, q)) AS x
SELECT ABS(a) FROM test
SELECT AVG(a) FROM test
SELECT CEIL(a) FROM test
SELECT CEIL(a, b) FROM test
SELECT COUNT(a) FROM test
SELECT COUNT(1) FROM test
SELECT COUNT(*) FROM test
SELECT COUNT(DISTINCT a) FROM test
SELECT EXP(a) FROM test
SELECT FLOOR(a) FROM test
SELECT FLOOR(a, b) FROM test
SELECT FIRST(a) FROM test
SELECT GREATEST(a, b, c) FROM test
SELECT LAST(a) FROM test
@ -299,6 +309,7 @@ SELECT CAST(a AS MAP<INT, INT>) FROM test
SELECT CAST(a AS TIMESTAMP) FROM test
SELECT CAST(a AS DATE) FROM test
SELECT CAST(a AS ARRAY<INT>) FROM test
SELECT CAST(a AS VARIANT) FROM test
SELECT TRY_CAST(a AS INT) FROM test
SELECT COALESCE(a, b, c) FROM test
SELECT IFNULL(a, b) FROM test
@ -442,13 +453,10 @@ CREATE TABLE z (a INT(11) DEFAULT NULL COMMENT '客户id')
CREATE TABLE z (a INT(11) NOT NULL DEFAULT 1)
CREATE TABLE z (a INT(11) NOT NULL COLLATE utf8_bin AUTO_INCREMENT)
CREATE TABLE z (a INT, PRIMARY KEY(a))
CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY(a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'
CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1
CREATE TABLE z WITH (FORMAT='ORC', x = '2') AS SELECT 1
CREATE TABLE z WITH (FORMAT='ORC', x='2') AS SELECT 1
CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='parquet') AS SELECT 1
CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x = '2') AS SELECT 1
CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x='2') AS SELECT 1
CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT, y INT))
CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT)) AS SELECT 1
CREATE TABLE z AS (WITH cte AS (SELECT 1) SELECT * FROM cte)
@ -460,6 +468,9 @@ CREATE TEMPORARY FUNCTION f
CREATE TEMPORARY FUNCTION f AS 'g'
CREATE FUNCTION f
CREATE FUNCTION f AS 'g'
CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1'
CREATE FUNCTION a() LANGUAGE sql
CREATE FUNCTION a() LANGUAGE sql RETURNS INT
CREATE INDEX abc ON t (a)
CREATE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX abc ON t (a, b, b)
@ -519,3 +530,4 @@ WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS
WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a
SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z
SELECT ((SELECT 1) + 1)
SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES

View file

@ -1,42 +1,79 @@
SELECT 1 AS x, 2 AS y
UNION ALL
SELECT 1 AS x, 2 AS y;
WITH _e_0 AS (
SELECT
1 AS x,
2 AS y
)
SELECT
*
FROM _e_0
UNION ALL
SELECT
*
FROM _e_0;
-- No derived tables
SELECT * FROM x;
SELECT * FROM x;
SELECT x.id
FROM (
SELECT *
FROM x AS x
JOIN y AS y
ON x.id = y.id
) AS x
JOIN (
SELECT *
FROM x AS x
JOIN y AS y
ON x.id = y.id
) AS y
ON x.id = y.id;
WITH _e_0 AS (
SELECT
*
FROM x AS x
JOIN y AS y
ON x.id = y.id
)
SELECT
x.id
FROM "_e_0" AS x
JOIN "_e_0" AS y
ON x.id = y.id;
-- Unaliased derived tables
SELECT a FROM (SELECT b FROM (SELECT c FROM x));
WITH cte AS (SELECT c FROM x), cte_2 AS (SELECT b FROM cte AS cte) SELECT a FROM cte_2 AS cte_2;
-- Joined derived table inside nested derived table
SELECT b FROM (SELECT b FROM (SELECT b FROM x JOIN (SELECT b FROM y) AS y ON x.b = y.b));
WITH y_2 AS (SELECT b FROM y), cte AS (SELECT b FROM x JOIN y_2 AS y ON x.b = y.b), cte_2 AS (SELECT b FROM cte AS cte) SELECT b FROM cte_2 AS cte_2;
-- Aliased derived tables
SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS z;
WITH y AS (SELECT c FROM x), z AS (SELECT b FROM y AS y) SELECT a FROM z AS z;
-- Existing CTEs
WITH q AS (SELECT c FROM x) SELECT a FROM (SELECT b FROM q AS y) AS z;
WITH q AS (SELECT c FROM x), z AS (SELECT b FROM q AS y) SELECT a FROM z AS z;
-- Derived table inside CTE
WITH x AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM x;
WITH y AS (SELECT a FROM x), x AS (SELECT a FROM y AS y) SELECT a FROM x;
-- Name conflicts with existing outer derived table
SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS y;
WITH y AS (SELECT c FROM x), y_2 AS (SELECT b FROM y AS y) SELECT a FROM y_2 AS y;
-- Name conflicts with outer join
SELECT a, b FROM (SELECT c FROM (SELECT d FROM x) AS x) AS y JOIN x ON x.a = y.a;
WITH x_2 AS (SELECT d FROM x), y AS (SELECT c FROM x_2 AS x) SELECT a, b FROM y AS y JOIN x ON x.a = y.a;
-- Name conflicts with table name that is selected in another branch
SELECT * FROM (SELECT * FROM (SELECT a FROM x) AS x) AS y JOIN (SELECT * FROM x) AS z ON x.a = y.a;
WITH x_2 AS (SELECT a FROM x), y AS (SELECT * FROM x_2 AS x), z AS (SELECT * FROM x) SELECT * FROM y AS y JOIN z AS z ON x.a = y.a;
-- Name conflicts with table alias
SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z JOIN q AS y;
WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z JOIN q AS y;
-- Name conflicts with existing CTE
WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y;
WITH y_2 AS (SELECT a FROM x), y AS (SELECT a FROM y_2 AS y) SELECT a FROM y;
-- Union
SELECT 1 AS x, 2 AS y UNION ALL SELECT 1 AS x, 2 AS y;
WITH cte AS (SELECT 1 AS x, 2 AS y) SELECT cte.x AS x, cte.y AS y FROM cte AS cte UNION ALL SELECT cte.x AS x, cte.y AS y FROM cte AS cte;
-- Union of selects with derived tables
(SELECT a FROM (SELECT b FROM x)) UNION (SELECT a FROM (SELECT b FROM y));
WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT a FROM cte AS cte), cte_3 AS (SELECT b FROM y), cte_4 AS (SELECT a FROM cte_3 AS cte_3) (SELECT cte_2.a AS a FROM cte_2 AS cte_2) UNION (SELECT cte_4.a AS a FROM cte_4 AS cte_4);
-- Subquery
SELECT a FROM x WHERE b = (SELECT y.c FROM y);
SELECT a FROM x WHERE b = (SELECT y.c FROM y);
-- Correlated subquery
SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a);
SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a);
-- Duplicate CTE
SELECT a FROM (SELECT b FROM x) AS y JOIN (SELECT b FROM x) AS z;
WITH y AS (SELECT b FROM x) SELECT a FROM y AS y JOIN y AS z;
-- Doubly duplicate CTE
SELECT * FROM (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS z JOIN (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS q;
WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x JOIN y AS y) SELECT * FROM z AS z JOIN z AS q;
-- Another duplicate...
SELECT x.id FROM (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS x JOIN (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS y ON x.id = y.id;
WITH x_2 AS (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) SELECT x.id FROM x_2 AS x JOIN x_2 AS y ON x.id = y.id;
-- Root subquery
(SELECT * FROM (SELECT * FROM x)) LIMIT 1;
(WITH cte AS (SELECT * FROM x) SELECT * FROM cte AS cte) LIMIT 1;
-- Existing duplicate CTE
WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y JOIN y AS z;
WITH y AS (SELECT a FROM x) SELECT a FROM y AS y JOIN y AS z;

View file

@ -18,6 +18,14 @@ SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a;
SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
-- Outer query has join
SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
# leave_tables_isolated: true
SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM (SELECT x.a AS a, x.b AS b FROM x AS x WHERE x.a > 1) AS x JOIN y AS y ON x.b = y.b;
-- Join on derived table
SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b;
@ -42,13 +50,9 @@ SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2
SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b;
SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b;
-- WHERE clause in joined derived table is merged
-- WHERE clause in joined derived table is merged to ON clause
SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y WHERE y.c > 1;
-- WHERE clause in outer joined derived table is merged to ON clause
SELECT x.a, y.c FROM x LEFT JOIN (SELECT b, c FROM y WHERE c > 1) AS y;
SELECT x.a AS a, y.c AS c FROM x AS x LEFT JOIN y AS y ON y.c > 1;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON y.c > 1;
-- Comma JOIN in outer query
SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y;
@ -61,3 +65,35 @@ SELECT x.a AS a, z.c AS c FROM x AS x CROSS JOIN y AS z;
-- (Regression) Column in ORDER BY
SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1;
SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1;
-- CTE
WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x;
SELECT x.a AS a, x.b AS b FROM x AS x;
-- CTE with outer table alias
WITH y AS (SELECT a, b FROM x) SELECT a, b FROM y AS z;
SELECT x.a AS a, x.b AS b FROM x AS x;
-- Nested CTE
WITH x AS (SELECT a FROM x), x2 AS (SELECT a FROM x) SELECT a FROM x2;
SELECT x.a AS a FROM x AS x;
-- CTE WHERE clause is merged
WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, SUM(b) FROM x GROUP BY a;
SELECT x.a AS a, SUM(x.b) AS "_col_1" FROM x AS x WHERE x.a > 1 GROUP BY x.a;
-- CTE Outer query has join
WITH x AS (SELECT a, b FROM x WHERE a > 1) SELECT a, c FROM x AS x JOIN y ON x.b = y.b;
SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1;
-- CTE with inner table alias
WITH y AS (SELECT a, b FROM x AS q) SELECT a, b FROM y AS z;
SELECT q.a AS a, q.b AS b FROM x AS q;
-- Duplicate queries to CTE
WITH x AS (SELECT a, b FROM x) SELECT x.a, y.b FROM x JOIN x AS y;
WITH x AS (SELECT x.a AS a, x.b AS b FROM x AS x) SELECT x.a AS a, y.b AS b FROM x JOIN x AS y;
-- Nested CTE
SELECT * FROM (WITH x AS (SELECT a, b FROM x) SELECT a, b FROM x);
SELECT x.a AS a, x.b AS b FROM x AS x;

View file

@ -65,18 +65,14 @@ WITH "cte1" AS (
SELECT
"x"."a" AS "a"
FROM "x" AS "x"
), "cte2" AS (
SELECT
"cte1"."a" + 1 AS "a"
FROM "cte1"
)
SELECT
"cte1"."a" AS "a"
FROM "cte1"
UNION ALL
SELECT
"cte2"."a" AS "a"
FROM "cte2";
"cte1"."a" + 1 AS "a"
FROM "cte1";
SELECT a, SUM(b)
FROM (
@ -86,18 +82,19 @@ FROM (
) d
WHERE (TRUE AND TRUE OR 'a' = 'b') AND a > 1
GROUP BY a;
SELECT
"x"."a" AS "a",
SUM("y"."b") AS "_col_1"
FROM "x" AS "x"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
MAX("y"."b") AS "_col_0",
"y"."a" AS "_u_1"
FROM "y" AS "y"
GROUP BY
"y"."a"
) AS "_u_0"
)
SELECT
"x"."a" AS "a",
SUM("y"."b") AS "_col_1"
FROM "x" AS "x"
LEFT JOIN "_u_0" AS "_u_0"
ON "x"."a" = "_u_0"."_u_1"
JOIN "y" AS "y"
ON "x"."a" = "y"."a"
@ -127,3 +124,16 @@ LIMIT 1;
FROM "y" AS "y"
)
LIMIT 1;
# dialect: spark
SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b;
SELECT /*+ BROADCAST(`y`) */
`x`.`b` AS `b`
FROM `x` AS `x`
JOIN `y` AS `y`
ON `x`.`b` = `y`.`b`;
SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x;
SELECT
AGGREGATE(ARRAY("x"."a", "x"."b"), 0, ("x", "acc") -> "x" + "acc" + "x"."a") AS "sum_agg"
FROM "x" AS "x";

View file

@ -69,6 +69,9 @@ SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x
SELECT x.b, x.a FROM x LEFT JOIN y ON x.b = y.b QUALIFY ROW_NUMBER() OVER(PARTITION BY x.b ORDER BY x.a DESC) = 1;
SELECT x.b AS b, x.a AS a FROM x AS x LEFT JOIN y AS y ON x.b = y.b QUALIFY ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a DESC) = 1;
SELECT AGGREGATE(ARRAY(a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x;
SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + x.a) AS sum_agg FROM x AS x;
--------------------------------------
-- Derived tables
--------------------------------------
@ -231,3 +234,10 @@ SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b WHERE COALES
SELECT b FROM x JOIN y USING (b) JOIN z USING (b);
SELECT COALESCE(x.b, y.b, z.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b;
--------------------------------------
-- Hint with table reference
--------------------------------------
# dialect: spark
SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b;
SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b;

View file

@ -5,7 +5,6 @@ SELECT z.* FROM x;
SELECT x FROM x;
INSERT INTO x VALUES (1, 2);
SELECT a FROM x AS z JOIN y AS z;
WITH z AS (SELECT * FROM x) SELECT * FROM x AS z;
SELECT a FROM x JOIN (SELECT b FROM y WHERE y.b = x.c);
SELECT a FROM x AS y JOIN (SELECT a FROM y) AS q ON y.a = q.a;
SELECT q.a FROM (SELECT x.b FROM x) AS z JOIN (SELECT a FROM z) AS q ON z.b = q.a;

View file

@ -97,19 +97,32 @@ order by
p_partkey
limit
100;
WITH "_e_0" AS (
WITH "partsupp_2" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
), "_e_1" AS (
), "region_2" AS (
SELECT
"region"."r_regionkey" AS "r_regionkey",
"region"."r_name" AS "r_name"
FROM "region" AS "region"
WHERE
"region"."r_name" = 'EUROPE'
), "_u_0" AS (
SELECT
MIN("partsupp"."ps_supplycost") AS "_col_0",
"partsupp"."ps_partkey" AS "_u_1"
FROM "partsupp_2" AS "partsupp"
CROSS JOIN "region_2" AS "region"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
GROUP BY
"partsupp"."ps_partkey"
)
SELECT
"supplier"."s_acctbal" AS "s_acctbal",
@ -121,25 +134,12 @@ SELECT
"supplier"."s_phone" AS "s_phone",
"supplier"."s_comment" AS "s_comment"
FROM "part" AS "part"
LEFT JOIN (
SELECT
MIN("partsupp"."ps_supplycost") AS "_col_0",
"partsupp"."ps_partkey" AS "_u_1"
FROM "_e_0" AS "partsupp"
CROSS JOIN "_e_1" AS "region"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
GROUP BY
"partsupp"."ps_partkey"
) AS "_u_0"
LEFT JOIN "_u_0" AS "_u_0"
ON "part"."p_partkey" = "_u_0"."_u_1"
CROSS JOIN "_e_1" AS "region"
CROSS JOIN "region_2" AS "region"
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "_e_0" AS "partsupp"
JOIN "partsupp_2" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN "supplier" AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
@ -193,12 +193,12 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
AND "orders"."o_orderdate" < '1995-03-15'
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_shipdate" > '1995-03-15'
WHERE
"customer"."c_mktsegment" = 'BUILDING'
AND "lineitem"."l_shipdate" > '1995-03-15'
AND "orders"."o_orderdate" < '1995-03-15'
GROUP BY
"lineitem"."l_orderkey",
"orders"."o_orderdate",
@ -232,11 +232,7 @@ group by
o_orderpriority
order by
o_orderpriority;
SELECT
"orders"."o_orderpriority" AS "o_orderpriority",
COUNT(*) AS "order_count"
FROM "orders" AS "orders"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
"lineitem"."l_orderkey" AS "l_orderkey"
FROM "lineitem" AS "lineitem"
@ -244,7 +240,12 @@ LEFT JOIN (
"lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
GROUP BY
"lineitem"."l_orderkey"
) AS "_u_0"
)
SELECT
"orders"."o_orderpriority" AS "o_orderpriority",
COUNT(*) AS "order_count"
FROM "orders" AS "orders"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."l_orderkey" = "orders"."o_orderkey"
WHERE
"orders"."o_orderdate" < CAST('1993-10-01' AS DATE)
@ -290,7 +291,10 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
CROSS JOIN "region" AS "region"
AND "orders"."o_orderdate" < CAST('1995-01-01' AS DATE)
AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE)
JOIN "region" AS "region"
ON "region"."r_name" = 'ASIA'
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "supplier" AS "supplier"
@ -299,10 +303,6 @@ JOIN "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_suppkey" = "supplier"."s_suppkey"
WHERE
"orders"."o_orderdate" < CAST('1995-01-01' AS DATE)
AND "orders"."o_orderdate" >= CAST('1994-01-01' AS DATE)
AND "region"."r_name" = 'ASIA'
GROUP BY
"nation"."n_name"
ORDER BY
@ -371,7 +371,7 @@ order by
supp_nation,
cust_nation,
l_year;
WITH "_e_0" AS (
WITH "n1" AS (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name"
@ -389,14 +389,15 @@ SELECT
)) AS "revenue"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
ON "lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
AND "supplier"."s_suppkey" = "lineitem"."l_suppkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
JOIN "customer" AS "customer"
ON "customer"."c_custkey" = "orders"."o_custkey"
JOIN "_e_0" AS "n1"
JOIN "n1" AS "n1"
ON "supplier"."s_nationkey" = "n1"."n_nationkey"
JOIN "_e_0" AS "n2"
JOIN "n1" AS "n2"
ON "customer"."c_nationkey" = "n2"."n_nationkey"
AND (
"n1"."n_name" = 'FRANCE'
@ -406,8 +407,6 @@ JOIN "_e_0" AS "n2"
"n1"."n_name" = 'GERMANY'
OR "n2"."n_name" = 'GERMANY'
)
WHERE
"lineitem"."l_shipdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
GROUP BY
"n1"."n_name",
"n2"."n_name",
@ -469,13 +468,15 @@ SELECT
1 - "lineitem"."l_discount"
)) AS "mkt_share"
FROM "part" AS "part"
CROSS JOIN "region" AS "region"
JOIN "region" AS "region"
ON "region"."r_name" = 'AMERICA'
JOIN "nation" AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "customer" AS "customer"
ON "customer"."c_nationkey" = "nation"."n_nationkey"
JOIN "orders" AS "orders"
ON "orders"."o_custkey" = "customer"."c_custkey"
AND "orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "part"."p_partkey" = "lineitem"."l_partkey"
@ -484,9 +485,7 @@ JOIN "supplier" AS "supplier"
JOIN "nation" AS "nation_2"
ON "supplier"."s_nationkey" = "nation_2"."n_nationkey"
WHERE
"orders"."o_orderdate" BETWEEN CAST('1995-01-01' AS DATE) AND CAST('1996-12-31' AS DATE)
AND "part"."p_type" = 'ECONOMY ANODIZED STEEL'
AND "region"."r_name" = 'AMERICA'
"part"."p_type" = 'ECONOMY ANODIZED STEEL'
GROUP BY
EXTRACT(year FROM "orders"."o_orderdate")
ORDER BY
@ -604,14 +603,13 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
JOIN "nation" AS "nation"
ON "customer"."c_nationkey" = "nation"."n_nationkey"
WHERE
"lineitem"."l_returnflag" = 'R'
AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE)
AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE)
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey"
AND "lineitem"."l_returnflag" = 'R'
JOIN "nation" AS "nation"
ON "customer"."c_nationkey" = "nation"."n_nationkey"
GROUP BY
"customer"."c_custkey",
"customer"."c_name",
@ -654,12 +652,12 @@ group by
)
order by
value desc;
WITH "_e_0" AS (
WITH "supplier_2" AS (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_nationkey" AS "s_nationkey"
FROM "supplier" AS "supplier"
), "_e_1" AS (
), "nation_2" AS (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name"
@ -671,9 +669,9 @@ SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value"
FROM "partsupp" AS "partsupp"
JOIN "_e_0" AS "supplier"
JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "_e_1" AS "nation"
JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
GROUP BY
"partsupp"."ps_partkey"
@ -682,9 +680,9 @@ HAVING
SELECT
SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") * 0.0001 AS "_col_0"
FROM "partsupp" AS "partsupp"
JOIN "_e_0" AS "supplier"
JOIN "supplier_2" AS "supplier"
ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey"
JOIN "_e_1" AS "nation"
JOIN "nation_2" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
)
ORDER BY
@ -737,13 +735,12 @@ SELECT
END) AS "low_line_count"
FROM "orders" AS "orders"
JOIN "lineitem" AS "lineitem"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
WHERE
"lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
AND "lineitem"."l_receiptdate" < CAST('1995-01-01' AS DATE)
AND "lineitem"."l_receiptdate" >= CAST('1994-01-01' AS DATE)
AND "lineitem"."l_shipdate" < "lineitem"."l_commitdate"
AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP')
AND "orders"."o_orderkey" = "lineitem"."l_orderkey"
GROUP BY
"lineitem"."l_shipmode"
ORDER BY
@ -772,10 +769,7 @@ group by
order by
custdist desc,
c_count desc;
SELECT
"c_orders"."c_count" AS "c_count",
COUNT(*) AS "custdist"
FROM (
WITH "c_orders" AS (
SELECT
COUNT("orders"."o_orderkey") AS "c_count"
FROM "customer" AS "customer"
@ -784,7 +778,11 @@ FROM (
AND NOT "orders"."o_comment" LIKE '%special%requests%'
GROUP BY
"customer"."c_custkey"
) AS "c_orders"
)
SELECT
"c_orders"."c_count" AS "c_count",
COUNT(*) AS "custdist"
FROM "c_orders" AS "c_orders"
GROUP BY
"c_orders"."c_count"
ORDER BY
@ -920,13 +918,7 @@ order by
p_brand,
p_type,
p_size;
SELECT
"part"."p_brand" AS "p_brand",
"part"."p_type" AS "p_type",
"part"."p_size" AS "p_size",
COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt"
FROM "partsupp" AS "partsupp"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
"supplier"."s_suppkey" AS "s_suppkey"
FROM "supplier" AS "supplier"
@ -934,15 +926,22 @@ LEFT JOIN (
"supplier"."s_comment" LIKE '%Customer%Complaints%'
GROUP BY
"supplier"."s_suppkey"
) AS "_u_0"
)
SELECT
"part"."p_brand" AS "p_brand",
"part"."p_type" AS "p_type",
"part"."p_size" AS "p_size",
COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt"
FROM "partsupp" AS "partsupp"
LEFT JOIN "_u_0" AS "_u_0"
ON "partsupp"."ps_suppkey" = "_u_0"."s_suppkey"
JOIN "part" AS "part"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
WHERE
"_u_0"."s_suppkey" IS NULL
AND "part"."p_brand" <> 'Brand#45'
ON "part"."p_brand" <> 'Brand#45'
AND "part"."p_partkey" = "partsupp"."ps_partkey"
AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9)
AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%'
WHERE
"_u_0"."s_suppkey" IS NULL
GROUP BY
"part"."p_brand",
"part"."p_type",
@ -973,24 +972,25 @@ where
where
l_partkey = p_partkey
);
SELECT
SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly"
FROM "lineitem" AS "lineitem"
JOIN "part" AS "part"
ON "part"."p_partkey" = "lineitem"."l_partkey"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
0.2 * AVG("lineitem"."l_quantity") AS "_col_0",
"lineitem"."l_partkey" AS "_u_1"
FROM "lineitem" AS "lineitem"
GROUP BY
"lineitem"."l_partkey"
) AS "_u_0"
)
SELECT
SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly"
FROM "lineitem" AS "lineitem"
JOIN "part" AS "part"
ON "part"."p_brand" = 'Brand#23'
AND "part"."p_container" = 'MED BOX'
AND "part"."p_partkey" = "lineitem"."l_partkey"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "part"."p_partkey"
WHERE
"lineitem"."l_quantity" < "_u_0"."_col_0"
AND "part"."p_brand" = 'Brand#23'
AND "part"."p_container" = 'MED BOX'
AND NOT "_u_0"."_u_1" IS NULL;
--------------------------------------
@ -1030,6 +1030,16 @@ order by
o_orderdate
limit
100;
WITH "_u_0" AS (
SELECT
"lineitem"."l_orderkey" AS "l_orderkey"
FROM "lineitem" AS "lineitem"
GROUP BY
"lineitem"."l_orderkey",
"lineitem"."l_orderkey"
HAVING
SUM("lineitem"."l_quantity") > 300
)
SELECT
"customer"."c_name" AS "c_name",
"customer"."c_custkey" AS "c_custkey",
@ -1040,16 +1050,7 @@ SELECT
FROM "customer" AS "customer"
JOIN "orders" AS "orders"
ON "customer"."c_custkey" = "orders"."o_custkey"
LEFT JOIN (
SELECT
"lineitem"."l_orderkey" AS "l_orderkey"
FROM "lineitem" AS "lineitem"
GROUP BY
"lineitem"."l_orderkey",
"lineitem"."l_orderkey"
HAVING
SUM("lineitem"."l_quantity") > 300
) AS "_u_0"
LEFT JOIN "_u_0" AS "_u_0"
ON "orders"."o_orderkey" = "_u_0"."l_orderkey"
JOIN "lineitem" AS "lineitem"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
@ -1200,38 +1201,34 @@ where
and n_name = 'CANADA'
order by
s_name;
SELECT
"supplier"."s_name" AS "s_name",
"supplier"."s_address" AS "s_address"
FROM "supplier" AS "supplier"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
0.5 * SUM("lineitem"."l_quantity") AS "_col_0",
"lineitem"."l_partkey" AS "_u_1",
"lineitem"."l_suppkey" AS "_u_2"
FROM "lineitem" AS "lineitem"
WHERE
"lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE)
AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE)
GROUP BY
"lineitem"."l_partkey",
"lineitem"."l_suppkey"
), "_u_3" AS (
SELECT
"part"."p_partkey" AS "p_partkey"
FROM "part" AS "part"
WHERE
"part"."p_name" LIKE 'forest%'
GROUP BY
"part"."p_partkey"
), "_u_4" AS (
SELECT
"partsupp"."ps_suppkey" AS "ps_suppkey"
FROM "partsupp" AS "partsupp"
LEFT JOIN (
SELECT
0.5 * SUM("lineitem"."l_quantity") AS "_col_0",
"lineitem"."l_partkey" AS "_u_1",
"lineitem"."l_suppkey" AS "_u_2"
FROM "lineitem" AS "lineitem"
WHERE
"lineitem"."l_shipdate" < CAST('1995-01-01' AS DATE)
AND "lineitem"."l_shipdate" >= CAST('1994-01-01' AS DATE)
GROUP BY
"lineitem"."l_partkey",
"lineitem"."l_suppkey"
) AS "_u_0"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "partsupp"."ps_partkey"
AND "_u_0"."_u_2" = "partsupp"."ps_suppkey"
LEFT JOIN (
SELECT
"part"."p_partkey" AS "p_partkey"
FROM "part" AS "part"
WHERE
"part"."p_name" LIKE 'forest%'
GROUP BY
"part"."p_partkey"
) AS "_u_3"
LEFT JOIN "_u_3" AS "_u_3"
ON "partsupp"."ps_partkey" = "_u_3"."p_partkey"
WHERE
"partsupp"."ps_availqty" > "_u_0"."_col_0"
@ -1240,13 +1237,18 @@ LEFT JOIN (
AND NOT "_u_3"."p_partkey" IS NULL
GROUP BY
"partsupp"."ps_suppkey"
) AS "_u_4"
)
SELECT
"supplier"."s_name" AS "s_name",
"supplier"."s_address" AS "s_address"
FROM "supplier" AS "supplier"
LEFT JOIN "_u_4" AS "_u_4"
ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey"
JOIN "nation" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_name" = 'CANADA'
AND "supplier"."s_nationkey" = "nation"."n_nationkey"
WHERE
"nation"."n_name" = 'CANADA'
AND NOT "_u_4"."ps_suppkey" IS NULL
NOT "_u_4"."ps_suppkey" IS NULL
ORDER BY
"s_name";
@ -1294,22 +1296,14 @@ order by
s_name
limit
100;
SELECT
"supplier"."s_name" AS "s_name",
COUNT(*) AS "numwait"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "supplier"."s_suppkey" = "lineitem"."l_suppkey"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
"l2"."l_orderkey" AS "l_orderkey",
ARRAY_AGG("l2"."l_suppkey") AS "_u_1"
FROM "lineitem" AS "l2"
GROUP BY
"l2"."l_orderkey"
) AS "_u_0"
ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey"
LEFT JOIN (
), "_u_2" AS (
SELECT
"l3"."l_orderkey" AS "l_orderkey",
ARRAY_AGG("l3"."l_suppkey") AS "_u_3"
@ -1318,20 +1312,29 @@ LEFT JOIN (
"l3"."l_receiptdate" > "l3"."l_commitdate"
GROUP BY
"l3"."l_orderkey"
) AS "_u_2"
)
SELECT
"supplier"."s_name" AS "s_name",
COUNT(*) AS "numwait"
FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_receiptdate" > "lineitem"."l_commitdate"
AND "supplier"."s_suppkey" = "lineitem"."l_suppkey"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."l_orderkey" = "lineitem"."l_orderkey"
LEFT JOIN "_u_2" AS "_u_2"
ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey"
JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey"
AND "orders"."o_orderstatus" = 'F'
JOIN "nation" AS "nation"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
ON "nation"."n_name" = 'SAUDI ARABIA'
AND "supplier"."s_nationkey" = "nation"."n_nationkey"
WHERE
(
"_u_2"."l_orderkey" IS NULL
OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "_x" <> "lineitem"."l_suppkey")
)
AND "lineitem"."l_receiptdate" > "lineitem"."l_commitdate"
AND "nation"."n_name" = 'SAUDI ARABIA'
AND "orders"."o_orderstatus" = 'F'
AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "_x" <> "lineitem"."l_suppkey")
AND NOT "_u_0"."l_orderkey" IS NULL
GROUP BY
@ -1381,18 +1384,19 @@ group by
cntrycode
order by
cntrycode;
SELECT
SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode",
COUNT(*) AS "numcust",
SUM("customer"."c_acctbal") AS "totacctbal"
FROM "customer" AS "customer"
LEFT JOIN (
WITH "_u_0" AS (
SELECT
"orders"."o_custkey" AS "_u_1"
FROM "orders" AS "orders"
GROUP BY
"orders"."o_custkey"
) AS "_u_0"
)
SELECT
SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode",
COUNT(*) AS "numcust",
SUM("customer"."c_acctbal") AS "totacctbal"
FROM "customer" AS "customer"
LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "customer"."c_custkey"
WHERE
"_u_0"."_u_1" IS NULL

View file

@ -264,22 +264,3 @@ CREATE TABLE "t_customer_account" (
"account_no" VARCHAR(100)
);
CREATE TABLE "t_customer_account" (
"id" int(11) NOT NULL AUTO_INCREMENT,
"customer_id" int(11) DEFAULT NULL COMMENT '客户id',
"bank" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
"account_no" varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
PRIMARY KEY ("id")
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表';
CREATE TABLE "t_customer_account" (
"id" INT(11) NOT NULL AUTO_INCREMENT,
"customer_id" INT(11) DEFAULT NULL COMMENT '客户id',
"bank" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别',
"account_no" VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号',
PRIMARY KEY("id")
)
ENGINE=InnoDB
AUTO_INCREMENT=1
DEFAULT CHARACTER SET=utf8
COLLATE=utf8_bin
COMMENT='客户账户表';

View file

@ -270,7 +270,7 @@ class TestBuild(unittest.TestCase):
lambda: parse_one("SELECT * FROM y")
.assert_is(exp.Select)
.ctas("foo.x", properties={"format": "parquet", "y": "2"}),
"CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y' = '2') AS SELECT * FROM y",
"CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y'='2') AS SELECT * FROM y",
"hive",
),
(lambda: and_("x=1", "y=1"), "x = 1 AND y = 1"),
@ -308,6 +308,18 @@ class TestBuild(unittest.TestCase):
lambda: exp.subquery("select x from tbl UNION select x from bar", "unioned").select("x"),
"SELECT x FROM (SELECT x FROM tbl UNION SELECT x FROM bar) AS unioned",
),
(
lambda: exp.update("tbl", {"x": None, "y": {"x": 1}}),
"UPDATE tbl SET x = NULL, y = MAP('x', 1)",
),
(
lambda: exp.update("tbl", {"x": 1}, where="y > 0"),
"UPDATE tbl SET x = 1 WHERE y > 0",
),
(
lambda: exp.update("tbl", {"x": 1}, from_="tbl2"),
"UPDATE tbl SET x = 1 FROM tbl2",
),
]:
with self.subTest(sql):
self.assertEqual(expression().sql(dialect[0] if dialect else None), sql)

View file

@ -27,6 +27,8 @@ class TestExpressions(unittest.TestCase):
parse_one("ROW() OVER (partition BY y)"),
)
self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)"))
self.assertEqual(exp.Table(pivots=[]), exp.Table())
self.assertNotEqual(exp.Table(pivots=[None]), exp.Table())
def test_find(self):
expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y")
@ -280,6 +282,19 @@ class TestExpressions(unittest.TestCase):
expression.find(exp.Table).replace(parse_one("y"))
self.assertEqual(expression.sql(), "SELECT c, b FROM y")
def test_pop(self):
expression = parse_one("SELECT a, b FROM x")
expression.find(exp.Column).pop()
self.assertEqual(expression.sql(), "SELECT b FROM x")
expression.find(exp.Column).pop()
self.assertEqual(expression.sql(), "SELECT FROM x")
expression.pop()
self.assertEqual(expression.sql(), "SELECT FROM x")
expression = parse_one("WITH x AS (SELECT a FROM x) SELECT * FROM x")
expression.find(exp.With).pop()
self.assertEqual(expression.sql(), "SELECT * FROM x")
def test_walk(self):
expression = parse_one("SELECT * FROM (SELECT * FROM x)")
self.assertEqual(len(list(expression.walk())), 9)
@ -316,6 +331,7 @@ class TestExpressions(unittest.TestCase):
self.assertIsInstance(parse_one("MAX(a)"), exp.Max)
self.assertIsInstance(parse_one("MIN(a)"), exp.Min)
self.assertIsInstance(parse_one("MONTH(a)"), exp.Month)
self.assertIsInstance(parse_one("POSITION(' ' IN a)"), exp.StrPosition)
self.assertIsInstance(parse_one("POW(a, 2)"), exp.Pow)
self.assertIsInstance(parse_one("POWER(a, 2)"), exp.Pow)
self.assertIsInstance(parse_one("QUANTILE(a, 0.90)"), exp.Quantile)
@ -420,7 +436,7 @@ class TestExpressions(unittest.TestCase):
exp.Properties.from_dict(
{
"FORMAT": "parquet",
"PARTITIONED_BY": [exp.to_identifier("a"), exp.to_identifier("b")],
"PARTITIONED_BY": (exp.to_identifier("a"), exp.to_identifier("b")),
"custom": 1,
"TABLE_FORMAT": exp.to_identifier("test_format"),
"ENGINE": None,
@ -444,4 +460,17 @@ class TestExpressions(unittest.TestCase):
),
)
self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": {"key": "value"}})
self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object})
def test_convert(self):
for value, expected in [
(1, "1"),
("1", "'1'"),
(None, "NULL"),
(True, "TRUE"),
((1, "2", None), "(1, '2', NULL)"),
([1, "2", None], "ARRAY(1, '2', NULL)"),
({"x": None}, "MAP('x', NULL)"),
]:
with self.subTest(value):
self.assertEqual(exp.convert(value).sql(), expected)

View file

@ -1,9 +1,11 @@
import unittest
from functools import partial
from sqlglot import optimizer, parse_one, table
from sqlglot import exp, optimizer, parse_one, table
from sqlglot.errors import OptimizeError
from sqlglot.optimizer.annotate_types import annotate_types
from sqlglot.optimizer.schema import MappingSchema, ensure_schema
from sqlglot.optimizer.scope import traverse_scope
from sqlglot.optimizer.scope import build_scope, traverse_scope
from tests.helpers import TPCH_SCHEMA, load_sql_fixture_pairs, load_sql_fixtures
@ -27,11 +29,17 @@ class TestOptimizer(unittest.TestCase):
}
def check_file(self, file, func, pretty=False, **kwargs):
for meta, sql, expected in load_sql_fixture_pairs(f"optimizer/{file}.sql"):
for i, (meta, sql, expected) in enumerate(load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1):
dialect = meta.get("dialect")
with self.subTest(sql):
leave_tables_isolated = meta.get("leave_tables_isolated")
func_kwargs = {**kwargs}
if leave_tables_isolated is not None:
func_kwargs["leave_tables_isolated"] = leave_tables_isolated.lower() in ("true", "1")
with self.subTest(f"{i}, {sql}"):
self.assertEqual(
func(parse_one(sql, read=dialect), **kwargs).sql(pretty=pretty, dialect=dialect),
func(parse_one(sql, read=dialect), **func_kwargs).sql(pretty=pretty, dialect=dialect),
expected,
)
@ -123,21 +131,20 @@ class TestOptimizer(unittest.TestCase):
optimizer.optimize_joins.optimize_joins,
)
def test_eliminate_subqueries(self):
self.check_file(
"eliminate_subqueries",
optimizer.eliminate_subqueries.eliminate_subqueries,
pretty=True,
def test_merge_subqueries(self):
optimize = partial(
optimizer.optimize,
rules=[
optimizer.qualify_tables.qualify_tables,
optimizer.qualify_columns.qualify_columns,
optimizer.merge_subqueries.merge_subqueries,
],
)
def test_merge_derived_tables(self):
def optimize(expression, **kwargs):
expression = optimizer.qualify_tables.qualify_tables(expression)
expression = optimizer.qualify_columns.qualify_columns(expression, **kwargs)
expression = optimizer.merge_derived_tables.merge_derived_tables(expression)
return expression
self.check_file("merge_subqueries", optimize, schema=self.schema)
self.check_file("merge_derived_tables", optimize, schema=self.schema)
def test_eliminate_subqueries(self):
self.check_file("eliminate_subqueries", optimizer.eliminate_subqueries.eliminate_subqueries)
def test_tpch(self):
self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True)
@ -257,17 +264,73 @@ FROM READ_CSV('tests/fixtures/optimizer/tpc-h/nation.csv.gz', 'delimiter', '|')
ON s.b = r.b
WHERE s.b > (SELECT MAX(x.a) FROM x WHERE x.b = s.b)
"""
scopes = traverse_scope(parse_one(sql))
self.assertEqual(len(scopes), 5)
self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x")
self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y")
self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b")
self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y")
self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql())
for scopes in traverse_scope(parse_one(sql)), list(build_scope(parse_one(sql)).traverse()):
self.assertEqual(len(scopes), 5)
self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x")
self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y")
self.assertEqual(scopes[2].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b")
self.assertEqual(scopes[3].expression.sql(), "SELECT y.c AS b FROM y")
self.assertEqual(scopes[4].expression.sql(), parse_one(sql).sql())
self.assertEqual(set(scopes[4].sources), {"q", "r", "s"})
self.assertEqual(len(scopes[4].columns), 6)
self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"})
self.assertEqual(scopes[4].source_columns("q"), [])
self.assertEqual(len(scopes[4].source_columns("r")), 2)
self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"})
self.assertEqual(set(scopes[4].sources), {"q", "r", "s"})
self.assertEqual(len(scopes[4].columns), 6)
self.assertEqual(set(c.table for c in scopes[4].columns), {"r", "s"})
self.assertEqual(scopes[4].source_columns("q"), [])
self.assertEqual(len(scopes[4].source_columns("r")), 2)
self.assertEqual(set(c.table for c in scopes[4].source_columns("r")), {"r"})
def test_literal_type_annotation(self):
tests = {
"SELECT 5": exp.DataType.Type.INT,
"SELECT 5.3": exp.DataType.Type.DOUBLE,
"SELECT 'bla'": exp.DataType.Type.VARCHAR,
"5": exp.DataType.Type.INT,
"5.3": exp.DataType.Type.DOUBLE,
"'bla'": exp.DataType.Type.VARCHAR,
}
for sql, target_type in tests.items():
expression = parse_one(sql)
annotated_expression = annotate_types(expression)
self.assertEqual(annotated_expression.find(exp.Literal).type, target_type)
def test_boolean_type_annotation(self):
tests = {
"SELECT TRUE": exp.DataType.Type.BOOLEAN,
"FALSE": exp.DataType.Type.BOOLEAN,
}
for sql, target_type in tests.items():
expression = parse_one(sql)
annotated_expression = annotate_types(expression)
self.assertEqual(annotated_expression.find(exp.Boolean).type, target_type)
def test_cast_type_annotation(self):
expression = parse_one("CAST('2020-01-01' AS TIMESTAMPTZ(9))")
annotate_types(expression)
self.assertEqual(expression.type, exp.DataType.Type.TIMESTAMPTZ)
self.assertEqual(expression.this.type, exp.DataType.Type.VARCHAR)
self.assertEqual(expression.args["to"].type, exp.DataType.Type.TIMESTAMPTZ)
self.assertEqual(expression.args["to"].expressions[0].type, exp.DataType.Type.INT)
def test_cache_annotation(self):
expression = parse_one("CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1")
annotated_expression = annotate_types(expression)
self.assertEqual(annotated_expression.expression.expressions[0].type, exp.DataType.Type.INT)
def test_binary_annotation(self):
expression = parse_one("SELECT 0.0 + (2 + 3)")
annotate_types(expression)
expression = expression.expressions[0]
self.assertEqual(expression.type, exp.DataType.Type.DOUBLE)
self.assertEqual(expression.left.type, exp.DataType.Type.DOUBLE)
self.assertEqual(expression.right.type, exp.DataType.Type.INT)
self.assertEqual(expression.right.this.type, exp.DataType.Type.INT)
self.assertEqual(expression.right.this.left.type, exp.DataType.Type.INT)
self.assertEqual(expression.right.this.right.type, exp.DataType.Type.INT)

View file

@ -21,6 +21,11 @@ class TestParser(unittest.TestCase):
self.assertIsNotNone(parse_one("date").find(exp.Column))
def test_float(self):
self.assertEqual(parse_one(".2"), parse_one("0.2"))
self.assertEqual(parse_one("int 1"), parse_one("CAST(1 AS INT)"))
self.assertEqual(parse_one("int.5"), parse_one("CAST(0.5 AS INT)"))
def test_table(self):
tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)]
self.assertEqual(tables, ["a", "b.c", "d"])

View file

@ -6,11 +6,32 @@ from sqlglot.transforms import unalias_group
class TestTime(unittest.TestCase):
def validate(self, transform, sql, target):
self.assertEqual(parse_one(sql).transform(transform).sql(), target)
with self.subTest(sql):
self.assertEqual(parse_one(sql).transform(transform).sql(), target)
def test_unalias_group(self):
self.validate(
unalias_group,
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4",
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, 2, x.c, 4",
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4",
)
self.validate(
unalias_group,
"SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), aliased_last",
"SELECT TO_DATE(the_date) AS the_date, CUSTOM_UDF(other_col) AS other_col, last_col AS aliased_last, COUNT(*) AS the_count FROM x GROUP BY TO_DATE(the_date), CUSTOM_UDF(other_col), 3",
)
self.validate(
unalias_group,
"SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))",
"SELECT SOME_UDF(TO_DATE(the_date)) AS the_date, COUNT(*) AS the_count FROM x GROUP BY SOME_UDF(TO_DATE(the_date))",
)
self.validate(
unalias_group,
"SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY new_date",
"SELECT SOME_UDF(TO_DATE(the_date)) AS new_date, COUNT(*) AS the_count FROM x GROUP BY 1",
)
self.validate(
unalias_group,
"SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date",
"SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date",
)