1
0
Fork 0

Adding upstream version 26.16.2.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-04-25 07:26:56 +02:00
parent 4bfa0e7e53
commit 6e767a6f98
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
110 changed files with 62370 additions and 61414 deletions

View file

@ -276,15 +276,17 @@ class TestAthena(Validator):
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
this=exp.Schema(expressions=[exp.to_column("partition_col", quoted=True)])
),
]
),
expression=exp.select("1"),
)
# Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[]
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=False),
@ -303,7 +305,8 @@ class TestAthena(Validator):
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
this=exp.to_column("a", quoted=True),
expression=exp.Literal.number(4),
),
]
)
@ -312,11 +315,25 @@ class TestAthena(Validator):
),
expression=exp.select("1"),
)
# Even if identify=True, the column names should not be quoted within the string literals in the partitioning ARRAY[]
# Technically Trino's Iceberg connector does support quoted column names in the string literals but its undocumented
# so we dont do it to keep consistency with the Hive connector
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)
def test_parse_partitioned_by_returns_iceberg_transforms(self):
# check that parse_into works for PartitionedByProperty and also that correct AST nodes are emitted for Iceberg transforms
parsed = self.parse_one(
"(a, bucket(4, b), truncate(3, c), month(d))", into=exp.PartitionedByProperty
)
assert isinstance(parsed, exp.PartitionedByProperty)
assert isinstance(parsed.this, exp.Schema)
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionedByBucket))
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionByTruncate))

View file

@ -448,14 +448,13 @@ LANGUAGE js AS
"SELECT SUM(x RESPECT NULLS) AS x",
read={
"bigquery": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x RESPECT NULLS) AS x",
"postgres": "SELECT SUM(x) RESPECT NULLS AS x",
"spark": "SELECT SUM(x) RESPECT NULLS AS x",
"snowflake": "SELECT SUM(x) RESPECT NULLS AS x",
},
write={
"bigquery": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x RESPECT NULLS) AS x",
"duckdb": "SELECT SUM(x) AS x",
"postgres": "SELECT SUM(x) RESPECT NULLS AS x",
"spark": "SELECT SUM(x) RESPECT NULLS AS x",
"snowflake": "SELECT SUM(x) RESPECT NULLS AS x",
@ -465,7 +464,7 @@ LANGUAGE js AS
"SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
write={
"bigquery": "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
"duckdb": "SELECT QUANTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()",
"duckdb": "SELECT QUANTILE_CONT(x, 0.5) OVER ()",
"spark": "SELECT PERCENTILE_CONT(x, 0.5) RESPECT NULLS OVER ()",
},
)

View file

@ -739,6 +739,12 @@ class TestClickhouse(Validator):
with self.subTest(f"Casting to ClickHouse {data_type}"):
self.validate_identity(f"SELECT CAST(val AS {data_type})")
def test_nothing_type(self):
data_types = ["Nothing", "Nullable(Nothing)"]
for data_type in data_types:
with self.subTest(f"Casting to ClickHouse {data_type}"):
self.validate_identity(f"SELECT CAST(val AS {data_type})")
def test_aggregate_function_column_with_any_keyword(self):
# Regression test for https://github.com/tobymao/sqlglot/issues/4723
self.validate_all(
@ -766,6 +772,17 @@ ORDER BY (
pretty=True,
)
def test_create_table_as_alias(self):
ctas_alias = "CREATE TABLE my_db.my_table AS another_db.another_table"
expected = exp.Create(
this=exp.to_table("my_db.my_table"),
kind="TABLE",
expression=exp.to_table("another_db.another_table"),
)
self.assertEqual(self.parse_one(ctas_alias), expected)
self.validate_identity(ctas_alias)
def test_ddl(self):
db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None)
create_with_cluster = exp.Create(
@ -1220,6 +1237,15 @@ LIFETIME(MIN 0 MAX 0)""",
f"SELECT {func_alias}(SECOND, 1, bar)",
f"SELECT {func_name}(SECOND, 1, bar)",
)
# 4-arg functions of type <func>(unit, value, date, timezone)
for func in (("DATE_DIFF", "DATEDIFF"),):
func_name = func[0]
for func_alias in func:
with self.subTest(f"Test 4-arg date-time function {func_alias}"):
self.validate_identity(
f"SELECT {func_alias}(SECOND, 1, bar, 'UTC')",
f"SELECT {func_name}(SECOND, 1, bar, 'UTC')",
)
def test_convert(self):
self.assertEqual(

View file

@ -7,6 +7,12 @@ class TestDatabricks(Validator):
dialect = "databricks"
def test_databricks(self):
null_type = exp.DataType.build("VOID", dialect="databricks")
self.assertEqual(null_type.sql(), "NULL")
self.assertEqual(null_type.sql("databricks"), "VOID")
self.validate_identity("SELECT CAST(NULL AS VOID)")
self.validate_identity("SELECT void FROM t")
self.validate_identity("SELECT * FROM stream")
self.validate_identity("SELECT t.current_time FROM t")
self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT")
@ -89,7 +95,7 @@ class TestDatabricks(Validator):
self.validate_all(
"CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
write={
"databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y))))",
"databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))",
"tsql": "CREATE TABLE foo (x AS YEAR(CAST(y AS DATE)))",
},
)

View file

@ -1,4 +1,5 @@
from sqlglot import ErrorLevel, ParseError, UnsupportedError, exp, parse_one, transpile
from sqlglot.generator import logger as generator_logger
from sqlglot.helper import logger as helper_logger
from sqlglot.optimizer.annotate_types import annotate_types
from tests.dialects.test_dialect import Validator
@ -1416,20 +1417,26 @@ class TestDuckDB(Validator):
def test_ignore_nulls(self):
# Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM)
from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS
from sqlglot.dialects.duckdb import DuckDB
agg_funcs = (exp.Sum, exp.Max, exp.Min)
for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs:
for func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS + agg_funcs:
func = func_type(this=exp.to_identifier("col"))
ignore_null = exp.IgnoreNulls(this=func)
windowed_ignore_null = exp.Window(this=ignore_null)
if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS:
if func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS:
self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
else:
self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
with self.assertLogs(generator_logger) as cm:
self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
self.assertEqual(
str(cm.output[0]),
"WARNING:sqlglot:IGNORE NULLS is not supported for non-window functions.",
)
def test_attach_detach(self):
# ATTACH

View file

@ -317,8 +317,8 @@ class TestHive(Validator):
write={
"duckdb": "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"spark": "DATEDIFF(TO_DATE(a), TO_DATE(b))",
"hive": "DATEDIFF(a, b)",
"spark": "DATEDIFF(a, b)",
"": "DATEDIFF(CAST(a AS DATE), CAST(b AS DATE))",
},
)
@ -379,8 +379,8 @@ class TestHive(Validator):
write={
"duckdb": "DATE_DIFF('DAY', CAST(x AS DATE), CAST(y AS DATE))",
"presto": "DATE_DIFF('DAY', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))",
"hive": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"spark": "DATEDIFF(TO_DATE(y), TO_DATE(x))",
"hive": "DATEDIFF(TO_DATE(y), x)",
"spark": "DATEDIFF(TO_DATE(y), x)",
"": "DATEDIFF(CAST(y AS DATE), CAST(x AS DATE))",
},
)
@ -401,8 +401,8 @@ class TestHive(Validator):
write={
"duckdb": f"{unit}(CAST(x AS DATE))",
"presto": f"{unit}(CAST(CAST(x AS TIMESTAMP) AS DATE))",
"hive": f"{unit}(TO_DATE(x))",
"spark": f"{unit}(TO_DATE(x))",
"hive": f"{unit}(x)",
"spark": f"{unit}(x)",
},
)
@ -418,6 +418,8 @@ class TestHive(Validator):
)
def test_hive(self):
self.validate_identity("TO_DATE(TO_DATE(x))")
self.validate_identity("DAY(TO_DATE(x))")
self.validate_identity("SELECT * FROM t WHERE col IN ('stream')")
self.validate_identity("SET hiveconf:some_var = 5", check_command_warning=True)
self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))")

View file

@ -76,6 +76,7 @@ class TestPostgres(Validator):
self.validate_identity("SELECT CURRENT_SCHEMA")
self.validate_identity("SELECT CURRENT_USER")
self.validate_identity("SELECT * FROM ONLY t1")
self.validate_identity("SELECT INTERVAL '-1 MONTH'")
self.validate_identity(
"SELECT * FROM test_data, LATERAL JSONB_ARRAY_ELEMENTS(data) WITH ORDINALITY AS elem(value, ordinality)"
)

View file

@ -2572,3 +2572,9 @@ SINGLE = TRUE""",
self.validate_identity(
f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})"
)
def test_parameter(self):
expr = self.validate_identity("SELECT :1")
self.assertEqual(expr.find(exp.Placeholder), exp.Placeholder(this="1"))
self.validate_identity("SELECT :1, :2")
self.validate_identity("SELECT :1 + :2")

View file

@ -465,8 +465,8 @@ TBLPROPERTIES (
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMPTZ), CAST('1997-02-28 10:30:00' AS TIMESTAMPTZ))",
},
write={
"spark": "SELECT DATEDIFF(MONTH, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)), TO_DATE(CAST('1996-10-30' AS TIMESTAMP))) AS INT)",
"spark": "SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('1997-02-28 10:30:00' AS TIMESTAMP), CAST('1996-10-30' AS TIMESTAMP)) AS INT)",
},
)
self.validate_all(
@ -474,11 +474,11 @@ TBLPROPERTIES (
write={
"bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), WEEK)",
"duckdb": "SELECT DATE_DIFF('WEEK', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"hive": "SELECT CAST(DATEDIFF(TO_DATE('2020-12-31'), TO_DATE('2020-01-01')) / 7 AS INT)",
"hive": "SELECT CAST(DATEDIFF('2020-12-31', '2020-01-01') / 7 AS INT)",
"postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)",
"redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))",
"snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"spark": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))",
"spark": "SELECT DATEDIFF(WEEK, '2020-01-01', '2020-12-31')",
},
)
self.validate_all(
@ -592,11 +592,11 @@ TBLPROPERTIES (
self.validate_all(
"SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
write={
"databricks": "SELECT DATEDIFF(MONTH, TO_DATE('2020-01-01'), TO_DATE('2020-03-05'))",
"hive": "SELECT CAST(MONTHS_BETWEEN(TO_DATE('2020-03-05'), TO_DATE('2020-01-01')) AS INT)",
"databricks": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
"hive": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)",
"presto": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))",
"spark": "SELECT DATEDIFF(MONTH, TO_DATE('2020-01-01'), TO_DATE('2020-03-05'))",
"spark2": "SELECT CAST(MONTHS_BETWEEN(TO_DATE('2020-03-05'), TO_DATE('2020-01-01')) AS INT)",
"spark": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')",
"spark2": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)",
"trino": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))",
},
)
@ -707,8 +707,8 @@ TBLPROPERTIES (
write={
"duckdb": "MONTH(CAST('2021-03-01' AS DATE))",
"presto": "MONTH(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))",
"hive": "MONTH(TO_DATE('2021-03-01'))",
"spark": "MONTH(TO_DATE('2021-03-01'))",
"hive": "MONTH('2021-03-01')",
"spark": "MONTH('2021-03-01')",
},
)
self.validate_all(
@ -716,8 +716,8 @@ TBLPROPERTIES (
write={
"duckdb": "YEAR(CAST('2021-03-01' AS DATE))",
"presto": "YEAR(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))",
"hive": "YEAR(TO_DATE('2021-03-01'))",
"spark": "YEAR(TO_DATE('2021-03-01'))",
"hive": "YEAR('2021-03-01')",
"spark": "YEAR('2021-03-01')",
},
)
self.validate_all(

View file

@ -97,11 +97,19 @@ class TestTrino(Validator):
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
)
self.validate_identity(
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONED_BY=ARRAY[\'a\', \'b\'])',
identify=True,
)
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
)
self.validate_identity(
'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONING=ARRAY[\'a\', \'bucket(4, b)\', \'month(c)\'])',
identify=True,
)
def test_analyze(self):
self.validate_identity("ANALYZE tbl")