Merging upstream version 26.15.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
2a79d9df75
commit
de6539b796
58 changed files with 4878 additions and 4677 deletions
|
@ -202,6 +202,67 @@ class TestAthena(Validator):
|
|||
identify=True,
|
||||
)
|
||||
|
||||
def test_create_table(self):
|
||||
# There are two CREATE TABLE syntaxes
|
||||
# Both hit Athena's Hive engine but creating an Iceberg table is different from creating a normal Hive table
|
||||
|
||||
table_schema = exp.Schema(
|
||||
this=exp.to_table("foo.bar"),
|
||||
expressions=[
|
||||
exp.ColumnDef(this=exp.to_identifier("a"), kind=exp.DataType.build("int")),
|
||||
exp.ColumnDef(this=exp.to_identifier("b"), kind=exp.DataType.build("varchar")),
|
||||
],
|
||||
)
|
||||
|
||||
# Hive tables - CREATE EXTERNAL TABLE
|
||||
ct_hive = exp.Create(
|
||||
this=table_schema,
|
||||
kind="TABLE",
|
||||
properties=exp.Properties(
|
||||
expressions=[
|
||||
exp.ExternalProperty(),
|
||||
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(expressions=[exp.to_column("partition_col")])
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
ct_hive.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE EXTERNAL TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`)",
|
||||
)
|
||||
|
||||
# Iceberg tables - CREATE TABLE... TBLPROPERTIES ('table_type'='iceberg')
|
||||
# no EXTERNAL keyword and the 'table_type=iceberg' property must be set
|
||||
# ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
|
||||
ct_iceberg = exp.Create(
|
||||
this=table_schema,
|
||||
kind="TABLE",
|
||||
properties=exp.Properties(
|
||||
expressions=[
|
||||
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(
|
||||
expressions=[
|
||||
exp.to_column("partition_col"),
|
||||
exp.PartitionedByBucket(
|
||||
this=exp.to_column("a"), expression=exp.Literal.number(4)
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
|
||||
]
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
ct_iceberg.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`, BUCKET(4, `a`)) TBLPROPERTIES ('table_type'='iceberg')",
|
||||
)
|
||||
|
||||
def test_ctas(self):
|
||||
# Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
|
||||
# In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields
|
||||
|
@ -223,7 +284,11 @@ class TestAthena(Validator):
|
|||
)
|
||||
self.assertEqual(
|
||||
ctas_hive.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
|
||||
)
|
||||
self.assertEqual(
|
||||
ctas_hive.sql(dialect=self.dialect, identify=False),
|
||||
"CREATE TABLE foo.bar WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
|
||||
)
|
||||
|
||||
ctas_iceberg = exp.Create(
|
||||
|
@ -234,7 +299,14 @@ class TestAthena(Validator):
|
|||
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(expressions=[exp.to_column("partition_col")])
|
||||
this=exp.Schema(
|
||||
expressions=[
|
||||
exp.to_column("partition_col"),
|
||||
exp.PartitionedByBucket(
|
||||
this=exp.to_column("a"), expression=exp.Literal.number(4)
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
]
|
||||
),
|
||||
|
@ -242,5 +314,9 @@ class TestAthena(Validator):
|
|||
)
|
||||
self.assertEqual(
|
||||
ctas_iceberg.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1",
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
|
||||
)
|
||||
self.assertEqual(
|
||||
ctas_iceberg.sql(dialect=self.dialect, identify=False),
|
||||
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
|
||||
)
|
||||
|
|
|
@ -308,10 +308,6 @@ LANGUAGE js AS
|
|||
"""SELECT JSON '"foo"' AS json_data""",
|
||||
"""SELECT PARSE_JSON('"foo"') AS json_data""",
|
||||
)
|
||||
self.validate_identity(
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
|
||||
)
|
||||
self.validate_identity(
|
||||
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
|
||||
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
|
||||
|
@ -1519,8 +1515,8 @@ WHERE
|
|||
self.validate_all(
|
||||
"SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
|
||||
write={
|
||||
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
|
||||
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
|
||||
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' DAY) AS DATE[])",
|
||||
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' DAY)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
@ -2424,3 +2420,16 @@ OPTIONS (
|
|||
"SELECT 1 AS x UNION ALL STRICT CORRESPONDING BY (foo, bar) SELECT 2 AS x",
|
||||
"SELECT 1 AS x UNION ALL BY NAME ON (foo, bar) SELECT 2 AS x",
|
||||
)
|
||||
|
||||
def test_with_offset(self):
|
||||
self.validate_identity(
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
|
||||
)
|
||||
|
||||
for join_ops in ("LEFT", "RIGHT", "FULL", "NATURAL", "SEMI", "ANTI"):
|
||||
with self.subTest(f"Testing {join_ops} in test_with_offset"):
|
||||
self.validate_identity(
|
||||
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET {join_ops} JOIN foo",
|
||||
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET AS offset {join_ops} JOIN foo",
|
||||
)
|
||||
|
|
|
@ -1034,7 +1034,7 @@ class TestDuckDB(Validator):
|
|||
"clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
|
||||
"duckdb": "EPOCH_MS(x)",
|
||||
"mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
|
||||
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
|
||||
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))",
|
||||
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
|
||||
"spark": "TIMESTAMP_MILLIS(x)",
|
||||
},
|
||||
|
|
|
@ -568,7 +568,7 @@ FROM json_data, field_ids""",
|
|||
"x ^ y",
|
||||
write={
|
||||
"": "POWER(x, y)",
|
||||
"postgres": "x ^ y",
|
||||
"postgres": "POWER(x, y)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
@ -765,7 +765,7 @@ FROM json_data, field_ids""",
|
|||
"x / y ^ z",
|
||||
write={
|
||||
"": "x / POWER(y, z)",
|
||||
"postgres": "x / y ^ z",
|
||||
"postgres": "x / POWER(y, z)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
|
|
@ -609,7 +609,7 @@ class TestSnowflake(Validator):
|
|||
"hive": "POWER(x, 2)",
|
||||
"mysql": "POWER(x, 2)",
|
||||
"oracle": "POWER(x, 2)",
|
||||
"postgres": "x ^ 2",
|
||||
"postgres": "POWER(x, 2)",
|
||||
"presto": "POWER(x, 2)",
|
||||
"redshift": "POWER(x, 2)",
|
||||
"snowflake": "POWER(x, 2)",
|
||||
|
@ -2563,3 +2563,12 @@ SINGLE = TRUE""",
|
|||
"duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t",
|
||||
},
|
||||
)
|
||||
|
||||
def test_rely_options(self):
|
||||
for option in ("NORELY", "RELY"):
|
||||
self.validate_identity(
|
||||
f"CREATE TABLE t (col1 INT PRIMARY KEY {option}, col2 INT UNIQUE {option}, col3 INT NOT NULL FOREIGN KEY REFERENCES other_t (id) {option})"
|
||||
)
|
||||
self.validate_identity(
|
||||
f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})"
|
||||
)
|
||||
|
|
|
@ -56,7 +56,7 @@ class TestSpark(Validator):
|
|||
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
write={
|
||||
"duckdb": "CREATE TABLE x",
|
||||
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
|
||||
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS(y)'])",
|
||||
"hive": "CREATE TABLE x STORED AS ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
},
|
||||
|
|
|
@ -93,6 +93,16 @@ class TestTrino(Validator):
|
|||
"CREATE TABLE foo.bar WITH (LOCATION='s3://bucket/foo/bar') AS SELECT 1"
|
||||
)
|
||||
|
||||
# Hive connector syntax (partitioned_by)
|
||||
self.validate_identity(
|
||||
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
|
||||
)
|
||||
|
||||
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
|
||||
self.validate_identity(
|
||||
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
|
||||
)
|
||||
|
||||
def test_analyze(self):
|
||||
self.validate_identity("ANALYZE tbl")
|
||||
self.validate_identity("ANALYZE tbl WITH (prop1=val1, prop2=val2)")
|
||||
|
|
6
tests/fixtures/optimizer/canonicalize.sql
vendored
6
tests/fixtures/optimizer/canonicalize.sql
vendored
|
@ -124,6 +124,12 @@ SELECT CAST(CAST(`t`.`some_col` AS DATE) AS DATETIME) < CAST(CAST(`t`.`other_col
|
|||
--------------------------------------
|
||||
-- Remove redundant casts
|
||||
--------------------------------------
|
||||
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x";
|
||||
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x";
|
||||
|
||||
CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(4, 2)) AS "x";
|
||||
CAST("foo" AS DECIMAL(4, 2)) AS "x";
|
||||
|
||||
CAST(CAST('2023-01-01' AS DATE) AS DATE);
|
||||
CAST('2023-01-01' AS DATE);
|
||||
|
||||
|
|
11
tests/fixtures/optimizer/unnest_subqueries.sql
vendored
11
tests/fixtures/optimizer/unnest_subqueries.sql
vendored
|
@ -67,3 +67,14 @@ SELECT x.a > _u_0.b FROM x CROSS JOIN (SELECT SUM(y.a) AS b FROM y) AS _u_0;
|
|||
|
||||
SELECT (SELECT MAX(t2.c1) AS c1 FROM t2 WHERE t2.c2 = t1.c2 AND t2.c3 <= TRUNC(t1.c3)) AS c FROM t1;
|
||||
SELECT _u_0.c1 AS c FROM t1 LEFT JOIN (SELECT MAX(t2.c1) AS c1, t2.c2 AS _u_1, MAX(t2.c3) AS _u_2 FROM t2 WHERE TRUE AND TRUE GROUP BY t2.c2) AS _u_0 ON _u_0._u_1 = t1.c2 WHERE _u_0._u_2 <= TRUNC(t1.c3);
|
||||
|
||||
SELECT s.t AS t FROM s WHERE 1 IN (SELECT t.a AS a FROM t WHERE t.b > 1);
|
||||
SELECT s.t AS t FROM s LEFT JOIN (SELECT t.a AS a FROM t WHERE t.b > 1 GROUP BY t.a) AS _u_0 ON 1 = _u_0.a WHERE NOT _u_0.a IS NULL;
|
||||
|
||||
# title: can't create GROUP BY clause with an aggregate
|
||||
SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) AS t1 FROM t);
|
||||
SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL;
|
||||
|
||||
# title: can't create GROUP BY clause with an aggregate (nested)
|
||||
SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) + 1 AS t1 FROM t);
|
||||
SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) + 1 AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL
|
||||
|
|
|
@ -533,6 +533,11 @@ class TestOptimizer(unittest.TestCase):
|
|||
def test_simplify(self):
|
||||
self.check_file("simplify", simplify)
|
||||
|
||||
# Ensure simplify mutates the AST properly
|
||||
expression = parse_one("SELECT 1 + 2")
|
||||
simplify(expression.selects[0])
|
||||
self.assertEqual(expression.sql(), "SELECT 3")
|
||||
|
||||
expression = parse_one("SELECT a, c, b FROM table1 WHERE 1 = 1")
|
||||
self.assertEqual(simplify(simplify(expression.find(exp.Where))).sql(), "WHERE TRUE")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue