1
0
Fork 0

Merging upstream version 26.15.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-04-21 09:50:04 +02:00
parent 2a79d9df75
commit de6539b796
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
58 changed files with 4878 additions and 4677 deletions

View file

@ -202,6 +202,67 @@ class TestAthena(Validator):
identify=True,
)
def test_create_table(self):
# There are two CREATE TABLE syntaxes
# Both hit Athena's Hive engine but creating an Iceberg table is different from creating a normal Hive table
table_schema = exp.Schema(
this=exp.to_table("foo.bar"),
expressions=[
exp.ColumnDef(this=exp.to_identifier("a"), kind=exp.DataType.build("int")),
exp.ColumnDef(this=exp.to_identifier("b"), kind=exp.DataType.build("varchar")),
],
)
# Hive tables - CREATE EXTERNAL TABLE
ct_hive = exp.Create(
this=table_schema,
kind="TABLE",
properties=exp.Properties(
expressions=[
exp.ExternalProperty(),
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
),
]
),
)
self.assertEqual(
ct_hive.sql(dialect=self.dialect, identify=True),
"CREATE EXTERNAL TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`)",
)
# Iceberg tables - CREATE TABLE... TBLPROPERTIES ('table_type'='iceberg')
# no EXTERNAL keyword and the 'table_type=iceberg' property must be set
# ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
ct_iceberg = exp.Create(
this=table_schema,
kind="TABLE",
properties=exp.Properties(
expressions=[
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
),
]
)
),
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
]
),
)
self.assertEqual(
ct_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`, BUCKET(4, `a`)) TBLPROPERTIES ('table_type'='iceberg')",
)
def test_ctas(self):
# Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
# In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields
@ -223,7 +284,11 @@ class TestAthena(Validator):
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
)
ctas_iceberg = exp.Create(
@ -234,7 +299,14 @@ class TestAthena(Validator):
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
this=exp.Schema(
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
),
]
)
),
]
),
@ -242,5 +314,9 @@ class TestAthena(Validator):
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)

View file

@ -308,10 +308,6 @@ LANGUAGE js AS
"""SELECT JSON '"foo"' AS json_data""",
"""SELECT PARSE_JSON('"foo"') AS json_data""",
)
self.validate_identity(
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
)
self.validate_identity(
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
@ -1519,8 +1515,8 @@ WHERE
self.validate_all(
"SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
write={
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' DAY) AS DATE[])",
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' DAY)",
},
)
self.validate_all(
@ -2424,3 +2420,16 @@ OPTIONS (
"SELECT 1 AS x UNION ALL STRICT CORRESPONDING BY (foo, bar) SELECT 2 AS x",
"SELECT 1 AS x UNION ALL BY NAME ON (foo, bar) SELECT 2 AS x",
)
def test_with_offset(self):
self.validate_identity(
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
)
for join_ops in ("LEFT", "RIGHT", "FULL", "NATURAL", "SEMI", "ANTI"):
with self.subTest(f"Testing {join_ops} in test_with_offset"):
self.validate_identity(
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET {join_ops} JOIN foo",
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET AS offset {join_ops} JOIN foo",
)

View file

@ -1034,7 +1034,7 @@ class TestDuckDB(Validator):
"clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
"duckdb": "EPOCH_MS(x)",
"mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))",
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
"spark": "TIMESTAMP_MILLIS(x)",
},

View file

@ -568,7 +568,7 @@ FROM json_data, field_ids""",
"x ^ y",
write={
"": "POWER(x, y)",
"postgres": "x ^ y",
"postgres": "POWER(x, y)",
},
)
self.validate_all(
@ -765,7 +765,7 @@ FROM json_data, field_ids""",
"x / y ^ z",
write={
"": "x / POWER(y, z)",
"postgres": "x / y ^ z",
"postgres": "x / POWER(y, z)",
},
)
self.validate_all(

View file

@ -609,7 +609,7 @@ class TestSnowflake(Validator):
"hive": "POWER(x, 2)",
"mysql": "POWER(x, 2)",
"oracle": "POWER(x, 2)",
"postgres": "x ^ 2",
"postgres": "POWER(x, 2)",
"presto": "POWER(x, 2)",
"redshift": "POWER(x, 2)",
"snowflake": "POWER(x, 2)",
@ -2563,3 +2563,12 @@ SINGLE = TRUE""",
"duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t",
},
)
def test_rely_options(self):
for option in ("NORELY", "RELY"):
self.validate_identity(
f"CREATE TABLE t (col1 INT PRIMARY KEY {option}, col2 INT UNIQUE {option}, col3 INT NOT NULL FOREIGN KEY REFERENCES other_t (id) {option})"
)
self.validate_identity(
f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})"
)

View file

@ -56,7 +56,7 @@ class TestSpark(Validator):
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
write={
"duckdb": "CREATE TABLE x",
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS(y)'])",
"hive": "CREATE TABLE x STORED AS ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
},

View file

@ -93,6 +93,16 @@ class TestTrino(Validator):
"CREATE TABLE foo.bar WITH (LOCATION='s3://bucket/foo/bar') AS SELECT 1"
)
# Hive connector syntax (partitioned_by)
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
)
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
self.validate_identity(
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
)
def test_analyze(self):
self.validate_identity("ANALYZE tbl")
self.validate_identity("ANALYZE tbl WITH (prop1=val1, prop2=val2)")

View file

@ -124,6 +124,12 @@ SELECT CAST(CAST(`t`.`some_col` AS DATE) AS DATETIME) < CAST(CAST(`t`.`other_col
--------------------------------------
-- Remove redundant casts
--------------------------------------
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x";
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x";
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(4, 2)) AS "x";
CAST("foo" AS DECIMAL(4, 2)) AS "x";
CAST(CAST('2023-01-01' AS DATE) AS DATE);
CAST('2023-01-01' AS DATE);

View file

@ -67,3 +67,14 @@ SELECT x.a > _u_0.b FROM x CROSS JOIN (SELECT SUM(y.a) AS b FROM y) AS _u_0;
SELECT (SELECT MAX(t2.c1) AS c1 FROM t2 WHERE t2.c2 = t1.c2 AND t2.c3 <= TRUNC(t1.c3)) AS c FROM t1;
SELECT _u_0.c1 AS c FROM t1 LEFT JOIN (SELECT MAX(t2.c1) AS c1, t2.c2 AS _u_1, MAX(t2.c3) AS _u_2 FROM t2 WHERE TRUE AND TRUE GROUP BY t2.c2) AS _u_0 ON _u_0._u_1 = t1.c2 WHERE _u_0._u_2 <= TRUNC(t1.c3);
SELECT s.t AS t FROM s WHERE 1 IN (SELECT t.a AS a FROM t WHERE t.b > 1);
SELECT s.t AS t FROM s LEFT JOIN (SELECT t.a AS a FROM t WHERE t.b > 1 GROUP BY t.a) AS _u_0 ON 1 = _u_0.a WHERE NOT _u_0.a IS NULL;
# title: can't create GROUP BY clause with an aggregate
SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) AS t1 FROM t);
SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL;
# title: can't create GROUP BY clause with an aggregate (nested)
SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) + 1 AS t1 FROM t);
SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) + 1 AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL

View file

@ -533,6 +533,11 @@ class TestOptimizer(unittest.TestCase):
def test_simplify(self):
self.check_file("simplify", simplify)
# Ensure simplify mutates the AST properly
expression = parse_one("SELECT 1 + 2")
simplify(expression.selects[0])
self.assertEqual(expression.sql(), "SELECT 3")
expression = parse_one("SELECT a, c, b FROM table1 WHERE 1 = 1")
self.assertEqual(simplify(simplify(expression.find(exp.Where))).sql(), "WHERE TRUE")