1
0
Fork 0

Merging upstream version 26.16.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-04-25 07:27:01 +02:00
parent f03ef3fd88
commit 1e2a8571aa
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
110 changed files with 62370 additions and 61414 deletions

View file

@ -276,15 +276,17 @@ class TestAthena(Validator):
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
this=exp.Schema(expressions=[exp.to_column("partition_col", quoted=True)])
),
]
),
expression=exp.select("1"),
)
# Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[]
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=False),
@ -303,7 +305,8 @@ class TestAthena(Validator):
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
this=exp.to_column("a", quoted=True),
expression=exp.Literal.number(4),
),
]
)
@ -312,11 +315,25 @@ class TestAthena(Validator):
),
expression=exp.select("1"),
)
# Even if identify=True, the column names should not be quoted within the string literals in the partitioning ARRAY[]
# Technically Trino's Iceberg connector does support quoted column names in the string literals but its undocumented
# so we dont do it to keep consistency with the Hive connector
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)
def test_parse_partitioned_by_returns_iceberg_transforms(self):
# check that parse_into works for PartitionedByProperty and also that correct AST nodes are emitted for Iceberg transforms
parsed = self.parse_one(
"(a, bucket(4, b), truncate(3, c), month(d))", into=exp.PartitionedByProperty
)
assert isinstance(parsed, exp.PartitionedByProperty)
assert isinstance(parsed.this, exp.Schema)
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionedByBucket))
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionByTruncate))

View file

@ -448,14 +448,13 @@ LANGUAGE js AS
"SELECT SUM(x RESPECT NULLS) AS x",
read={
"bigquery": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x RESPECT NULLS) AS x",
"postgres": "SELECT SUM(x) RESPECT NULLS AS x",
"spark": "SELECT SUM(x) RESPECT NULLS AS x",
"snowflake": "SELECT SUM(x) RESPECT NULLS AS x",
},
write={
"bigquery": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x) AS x",
"postgres": "SELECT SUM(x) RESPECT NULLS AS x",
"spark": "SELECT SUM(x) RESPECT NULLS AS x",
"snowflake": "SELECT SUM(x) RESPECT NULLS AS x",
@ -465,7 +464,7 @@ LANGUAGE js AS
"SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
write={
"bigquery": "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
"duckdb": "SELECT QUANTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
"duckdb": "SELECT QUANTILE_CONT(x, 0.5) OVER ()",
"spark": "SELECT PERCENTILE_CONT(x, 0.5) RESPECT NULLS OVER ()",
},
)

View file

@ -739,6 +739,12 @@ class TestClickhouse(Validator):
with self.subTest(f"Casting to ClickHouse {data_type}"):
self.validate_identity(f"SELECT CAST(val AS {data_type})")
def test_nothing_type(self):
data_types = ["Nothing", "Nullable(Nothing)"]
for data_type in data_types:
with self.subTest(f"Casting to ClickHouse {data_type}"):
self.validate_identity(f"SELECT CAST(val AS {data_type})")
def test_aggregate_function_column_with_any_keyword(self):
# Regression test for https://github.com/tobymao/sqlglot/issues/4723
self.validate_all(
@ -766,6 +772,17 @@ ORDER BY (
pretty=True,
)
def test_create_table_as_alias(self):
ctas_alias = "CREATE TABLE my_db.my_table AS another_db.another_table"
expected = exp.Create(
this=exp.to_table("my_db.my_table"),
kind="TABLE",
expression=exp.to_table("another_db.another_table"),
)
self.assertEqual(self.parse_one(ctas_alias), expected)
self.validate_identity(ctas_alias)
def test_ddl(self):
db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
create_with_cluster = exp.Create(
@ -1220,6 +1237,15 @@ LIFETIME(MIN 0 MAX 0)""",
f"SELECT {func_alias}(SECOND, 1, bar)",
f"SELECT {func_name}(SECOND, 1, bar)",
)
# 4-arg functions of type <func>(unit, value, date, timezone)
for func in (("DATE_DIFF", "DATEDIFF"),):
func_name = func[0]
for func_alias in func:
with self.subTest(f"Test 4-arg date-time function {func_alias}"):
self.validate_identity(
f"SELECT {func_alias}(SECOND, 1, bar, 'UTC')",
f"SELECT {func_name}(SECOND, 1, bar, 'UTC')",
)
def test_convert(self):
self.assertEqual(

View file

@ -7,6 +7,12 @@ class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
null_type = exp.DataType.build("VOID", dialect="databricks")
self.assertEqual(null_type.sql(), "NULL")
self.assertEqual(null_type.sql("databricks"), "VOID")
self.validate_identity("SELECT CAST(NULL AS VOID)")
self.validate_identity("SELECT void FROM t")
self.validate_identity("SELECT * FROM stream")
self.validate_identity("SELECT t.current_time FROM t")
self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT")
@ -89,7 +95,7 @@ class TestDatabricks(Validator):
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
write={
"databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y))))",
"databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
"tsql": "CREATE TABLE foo (x AS YEAR(CAST(y AS DATE)))",
},
)

View file

@ -1,4 +1,5 @@
from sqlglot import ErrorLevel, ParseError, UnsupportedError, exp, parse_one, transpile
from sqlglot.generator import logger as generator_logger
from sqlglot.helper import logger as helper_logger
from sqlglot.optimizer.annotate_types import annotate_types
from tests.dialects.test_dialect import Validator
@ -1416,20 +1417,26 @@ class TestDuckDB(Validator):
def test_ignore_nulls(self):
# Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM)
from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS
from sqlglot.dialects.duckdb import DuckDB
agg_funcs = (exp.Sum, exp.Max, exp.Min)
for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs:
for func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS + agg_funcs:
func = func_type(this=exp.to_identifier("col"))
ignore_null = exp.IgnoreNulls(this=func)
windowed_ignore_null = exp.Window(this=ignore_null)
if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS:
if func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS:
self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
else:
self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
with self.assertLogs(generator_logger) as cm:
self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
self.assertEqual(
str(cm.output[0]),
"WARNING:sqlglot:IGNORE NULLS is not supported for non-window functions.",
)
def test_attach_detach(self):
# ATTACH

View file

@ -317,8 +317,8 @@ class TestHive(Validator):
write={
"duckdb": "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"hive": "DATEDIFF(a, b)",
"spark": "DATEDIFF(a, b)",
"": "DATEDIFF(CAST(a AS DATE), CAST(b AS DATE))",
},
)
@ -379,8 +379,8 @@ class TestHive(Validator):
write={
"duckdb": "DATE_DIFF('DAY', CAST(x AS DATE), CAST(y AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"spark": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"hive": "DATEDIFF(TO_DATE(y), x)",
"spark": "DATEDIFF(TO_DATE(y), x)",
"": "DATEDIFF(CAST(y AS DATE), CAST(x AS DATE))",
},
)
@ -401,8 +401,8 @@ class TestHive(Validator):
write={
"duckdb": f"{unit}(CAST(x AS DATE))",
"presto": f"{unit}(CAST(CAST(x AS TIMESTAMP) AS DATE))",
"hive": f"{unit}(TO_DATE(x))",
"spark": f"{unit}(TO_DATE(x))",
"hive": f"{unit}(x)",
"spark": f"{unit}(x)",
},
)
@ -418,6 +418,8 @@ class TestHive(Validator):
)
def test_hive(self):
self.validate_identity("TO_DATE(TO_DATE(x))")
self.validate_identity("DAY(TO_DATE(x))")
self.validate_identity("SELECT * FROM t WHERE col IN ('stream')")
self.validate_identity("SET hiveconf:some_var = 5", check_command_warning=True)
self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))")

View file

@ -76,6 +76,7 @@ class TestPostgres(Validator):
self.validate_identity("SELECT CURRENT_SCHEMA")
self.validate_identity("SELECT CURRENT_USER")
self.validate_identity("SELECT * FROM ONLY t1")
self.validate_identity("SELECT INTERVAL '-1 MONTH'")
self.validate_identity(
"SELECT * FROM test_data, LATERAL JSONB_ARRAY_ELEMENTS(data) WITH ORDINALITY AS elem(value, ordinality)"
)

View file

@ -2572,3 +2572,9 @@ SINGLE = TRUE""",
self.validate_identity(
f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})"
)
def test_parameter(self):
expr = self.validate_identity("SELECT :1")
self.assertEqual(expr.find(exp.Placeholder), exp.Placeholder(this="1"))
self.validate_identity("SELECT :1, :2")
self.validate_identity("SELECT :1 + :2")

View file

@ -465,8 +465,8 @@ TBLPROPERTIES (
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMPTZ), CAST('1997-02-28 10:30:00' AS TIMESTAMPTZ))",
},
write={
"spark": "SELECT DATEDIFF(MONTH, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)), TO_DATE(CAST('1996-10-30' AS TIMESTAMP))) AS INT)",
"spark": "SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('1997-02-28 10:30:00' AS TIMESTAMP), CAST('1996-10-30' AS TIMESTAMP)) AS INT)",
},
)
self.validate_all(
@ -474,11 +474,11 @@ TBLPROPERTIES (
write={
"bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)",
"hive": "SELECT CAST(DATEDIFF('2020-12-31', '2020-01-01') / 7 AS INT)",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)",
"redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"spark": "SELECT DATEDIFF(WEEK, '2020-01-01', '2020-12-31')",
},
)
self.validate_all(
@ -592,11 +592,11 @@ TBLPROPERTIES (
self.validate_all(
"SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
write={
"databricks": "SELECT DATEDIFF(MONTH, TO_DATE('2020-01-01'), TO_DATE('2020-03-05'))",
"hive": "SELECT CAST(MONTHS_BETWEEN(TO_DATE('2020-03-05'), TO_DATE('2020-01-01')) AS INT)",
"databricks": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
"hive": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)",
"presto": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))",
"spark": "SELECT DATEDIFF(MONTH, TO_DATE('2020-01-01'), TO_DATE('2020-03-05'))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE('2020-03-05'), TO_DATE('2020-01-01')) AS INT)",
"spark": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
"spark2": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)",
"trino": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))",
},
)
@ -707,8 +707,8 @@ TBLPROPERTIES (
write={
"duckdb": "MONTH(CAST('2021-03-01' AS DATE))",
"presto": "MONTH(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))",
"hive": "MONTH(TO_DATE('2021-03-01'))",
"spark": "MONTH(TO_DATE('2021-03-01'))",
"hive": "MONTH('2021-03-01')",
"spark": "MONTH('2021-03-01')",
},
)
self.validate_all(
@ -716,8 +716,8 @@ TBLPROPERTIES (
write={
"duckdb": "YEAR(CAST('2021-03-01' AS DATE))",
"presto": "YEAR(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))",
"hive": "YEAR(TO_DATE('2021-03-01'))",
"spark": "YEAR(TO_DATE('2021-03-01'))",
"hive": "YEAR('2021-03-01')",
"spark": "YEAR('2021-03-01')",
},
)
self.validate_all(

View file

@ -97,11 +97,19 @@ class TestTrino(Validator):
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
)
self.validate_identity(
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONED_BY=ARRAY[\'a\', \'b\'])',
identify=True,
)
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
)
self.validate_identity(
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONING=ARRAY[\'a\', \'bucket(4, b)\', \'month(c)\'])',
identify=True,
)
def test_analyze(self):
self.validate_identity("ANALYZE tbl")

View file

@ -531,7 +531,10 @@ FROM "unioned" AS "unioned"
WHERE
"unioned"."source_system" = 'bamboohr' OR "unioned"."source_system" = 'workday'
QUALIFY
ROW_NUMBER() OVER (PARTITION BY "unioned"."unique_filter_key" ORDER BY "unioned"."sort_order" DESC, 1) = 1;
ROW_NUMBER() OVER (
PARTITION BY "unioned"."unique_filter_key"
ORDER BY "unioned"."sort_order" DESC, 1
) = 1;
# title: pivoted source with explicit selections
# execute: false
@ -1455,3 +1458,38 @@ LEFT JOIN "_u_1" AS "_u_1"
ON "_u_1"."tagname" = "event"."tagname"
WHERE
"event"."priority" = 'High' AND NOT "_u_1"."tagname" IS NULL;
# title: SELECT TRANSFORM ... Spark clause when schema is provided
# execute: false
# dialect: spark
WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' AS (y STRING) FROM a);
WITH `a` AS (
SELECT
'v' AS `x`
), `_q_0` AS (
SELECT
TRANSFORM(`a`.`x`) USING 'cat' AS (
`y` STRING
)
FROM `a` AS `a`
)
SELECT
`_q_0`.`y` AS `y`
FROM `_q_0` AS `_q_0`;
# title: SELECT TRANSFORM ... Spark clause when schema is not provided
# execute: false
# dialect: spark
WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' FROM a);
WITH `a` AS (
SELECT
'v' AS `x`
), `_q_0` AS (
SELECT
TRANSFORM(`a`.`x`) USING 'cat'
FROM `a` AS `a`
)
SELECT
`_q_0`.`key` AS `key`,
`_q_0`.`value` AS `value`
FROM `_q_0` AS `_q_0`;

View file

@ -4875,7 +4875,10 @@ SELECT
"item"."i_category" AS "i_category",
"item"."i_class" AS "i_class",
GROUPING("item"."i_category") + GROUPING("item"."i_class") AS "lochierarchy",
RANK() OVER (PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END ORDER BY SUM("store_sales"."ss_net_profit") / SUM("store_sales"."ss_ext_sales_price")) AS "rank_within_parent"
RANK() OVER (
PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END
ORDER BY SUM("store_sales"."ss_net_profit") / SUM("store_sales"."ss_ext_sales_price")
) AS "rank_within_parent"
FROM "store_sales" AS "store_sales"
JOIN "date_dim" AS "d1"
ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_year" = 2000
@ -5952,8 +5955,13 @@ WITH "v1" AS (
"date_dim"."d_year" AS "d_year",
"date_dim"."d_moy" AS "d_moy",
SUM("store_sales"."ss_sales_price") AS "sum_sales",
AVG(SUM("store_sales"."ss_sales_price")) OVER (PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name", "date_dim"."d_year") AS "avg_monthly_sales",
RANK() OVER (PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name" ORDER BY "date_dim"."d_year", "date_dim"."d_moy") AS "rn"
AVG(SUM("store_sales"."ss_sales_price")) OVER (
PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name", "date_dim"."d_year"
) AS "avg_monthly_sales",
RANK() OVER (
PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name"
ORDER BY "date_dim"."d_year", "date_dim"."d_moy"
) AS "rn"
FROM "item" AS "item"
JOIN "store_sales" AS "store_sales"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk"
@ -6591,7 +6599,11 @@ WITH "date_dim_2" AS (
SELECT
"web_sales"."ws_item_sk" AS "item_sk",
"date_dim"."d_date" AS "d_date",
SUM(SUM("web_sales"."ws_sales_price")) OVER (PARTITION BY "web_sales"."ws_item_sk" ORDER BY "date_dim"."d_date" rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "cume_sales"
SUM(SUM("web_sales"."ws_sales_price")) OVER (
PARTITION BY "web_sales"."ws_item_sk"
ORDER BY "date_dim"."d_date"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS "cume_sales"
FROM "web_sales" AS "web_sales"
JOIN "date_dim_2" AS "date_dim"
ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk"
@ -6604,7 +6616,11 @@ WITH "date_dim_2" AS (
SELECT
"store_sales"."ss_item_sk" AS "item_sk",
"date_dim"."d_date" AS "d_date",
SUM(SUM("store_sales"."ss_sales_price")) OVER (PARTITION BY "store_sales"."ss_item_sk" ORDER BY "date_dim"."d_date" rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "cume_sales"
SUM(SUM("store_sales"."ss_sales_price")) OVER (
PARTITION BY "store_sales"."ss_item_sk"
ORDER BY "date_dim"."d_date"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS "cume_sales"
FROM "store_sales" AS "store_sales"
JOIN "date_dim_2" AS "date_dim"
ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk"
@ -6623,16 +6639,24 @@ WITH "date_dim_2" AS (
CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END AS "d_date",
"web"."cume_sales" AS "web_sales",
"store"."cume_sales" AS "store_sales",
MAX("web"."cume_sales") OVER (PARTITION BY CASE
WHEN NOT "web"."item_sk" IS NULL
THEN "web"."item_sk"
ELSE "store"."item_sk"
END ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "web_cumulative",
MAX("store"."cume_sales") OVER (PARTITION BY CASE
WHEN NOT "web"."item_sk" IS NULL
THEN "web"."item_sk"
ELSE "store"."item_sk"
END ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "store_cumulative"
MAX("web"."cume_sales") OVER (
PARTITION BY CASE
WHEN NOT "web"."item_sk" IS NULL
THEN "web"."item_sk"
ELSE "store"."item_sk"
END
ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS "web_cumulative",
MAX("store"."cume_sales") OVER (
PARTITION BY CASE
WHEN NOT "web"."item_sk" IS NULL
THEN "web"."item_sk"
ELSE "store"."item_sk"
END
ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS "store_cumulative"
FROM "web_v1" AS "web"
FULL JOIN "store_v1" AS "store"
ON "store"."d_date" = "web"."d_date" AND "store"."item_sk" = "web"."item_sk"
@ -7258,8 +7282,13 @@ WITH "v1" AS (
"call_center"."cc_name" AS "cc_name",
"date_dim"."d_year" AS "d_year",
SUM("catalog_sales"."cs_sales_price") AS "sum_sales",
AVG(SUM("catalog_sales"."cs_sales_price")) OVER (PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name", "date_dim"."d_year") AS "avg_monthly_sales",
RANK() OVER (PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name" ORDER BY "date_dim"."d_year", "date_dim"."d_moy") AS "rn"
AVG(SUM("catalog_sales"."cs_sales_price")) OVER (
PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name", "date_dim"."d_year"
) AS "avg_monthly_sales",
RANK() OVER (
PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name"
ORDER BY "date_dim"."d_year", "date_dim"."d_moy"
) AS "rn"
FROM "item" AS "item"
JOIN "catalog_sales" AS "catalog_sales"
ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk"
@ -9835,7 +9864,10 @@ SELECT
"store"."s_state" AS "s_state",
"store"."s_county" AS "s_county",
GROUPING("store"."s_state") + GROUPING("store"."s_county") AS "lochierarchy",
RANK() OVER (PARTITION BY GROUPING("store"."s_state") + GROUPING("store"."s_county"), CASE WHEN GROUPING("store"."s_county") = 0 THEN "store"."s_state" END ORDER BY SUM("store_sales"."ss_net_profit") DESC) AS "rank_within_parent"
RANK() OVER (
PARTITION BY GROUPING("store"."s_state") + GROUPING("store"."s_county"), CASE WHEN GROUPING("store"."s_county") = 0 THEN "store"."s_state" END
ORDER BY SUM("store_sales"."ss_net_profit") DESC
) AS "rank_within_parent"
FROM "store_sales_2" AS "store_sales"
JOIN "date_dim" AS "d1"
ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk"
@ -11987,7 +12019,10 @@ SELECT
"item"."i_category" AS "i_category",
"item"."i_class" AS "i_class",
GROUPING("item"."i_category") + GROUPING("item"."i_class") AS "lochierarchy",
RANK() OVER (PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END ORDER BY SUM("web_sales"."ws_net_paid") DESC) AS "rank_within_parent"
RANK() OVER (
PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END
ORDER BY SUM("web_sales"."ws_net_paid") DESC
) AS "rank_within_parent"
FROM "web_sales" AS "web_sales"
JOIN "date_dim" AS "d1"
ON "d1"."d_date_sk" = "web_sales"."ws_sold_date_sk"
@ -12379,7 +12414,9 @@ WITH "tmp1" AS (
"store"."s_company_name" AS "s_company_name",
"date_dim"."d_moy" AS "d_moy",
SUM("store_sales"."ss_sales_price") AS "sum_sales",
AVG(SUM("store_sales"."ss_sales_price")) OVER (PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name") AS "avg_monthly_sales"
AVG(SUM("store_sales"."ss_sales_price")) OVER (
PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name"
) AS "avg_monthly_sales"
FROM "item" AS "item"
JOIN "store_sales" AS "store_sales"
ON "item"."i_item_sk" = "store_sales"."ss_item_sk"

View file

@ -728,6 +728,12 @@ class TestExecutor(unittest.TestCase):
result = execute(f"SELECT {sql}")
self.assertEqual(result.rows, [(expected,)])
result = execute(
"WITH t AS (SELECT 'a' AS c1, 'b' AS c2) SELECT NVL(c1, c2) FROM t",
dialect="oracle",
)
self.assertEqual(result.rows, [("a",)])
def test_case_sensitivity(self):
result = execute("SELECT A AS A FROM X", tables={"x": [{"a": 1}]})
self.assertEqual(result.columns, ("a",))

View file

@ -176,6 +176,11 @@ class TestOptimizer(unittest.TestCase):
expected,
actual,
)
for expression in optimized.walk():
for arg_key, arg in expression.args.items():
if isinstance(arg, exp.Expression):
self.assertEqual(arg_key, arg.arg_key)
self.assertIs(arg.parent, expression)
if string_to_bool(execute):
with self.subTest(f"(execute) {title}"):
@ -533,6 +538,11 @@ class TestOptimizer(unittest.TestCase):
def test_simplify(self):
self.check_file("simplify", simplify)
# Stress test with huge union query
union_sql = "SELECT 1 UNION ALL " * 1000 + "SELECT 1"
expression = parse_one(union_sql)
self.assertEqual(simplify(expression).sql(), union_sql)
# Ensure simplify mutates the AST properly
expression = parse_one("SELECT 1 + 2")
simplify(expression.selects[0])