Adding upstream version 26.15.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
dfe1cec38a
commit
4bfa0e7e53
58 changed files with 4878 additions and 4677 deletions
|
@ -202,6 +202,67 @@ class TestAthena(Validator):
|
|||
identify=True,
|
||||
)
|
||||
|
||||
def test_create_table(self):
|
||||
# There are two CREATE TABLE syntaxes
|
||||
# Both hit Athena's Hive engine but creating an Iceberg table is different from creating a normal Hive table
|
||||
|
||||
table_schema = exp.Schema(
|
||||
this=exp.to_table("foo.bar"),
|
||||
expressions=[
|
||||
exp.ColumnDef(this=exp.to_identifier("a"), kind=exp.DataType.build("int")),
|
||||
exp.ColumnDef(this=exp.to_identifier("b"), kind=exp.DataType.build("varchar")),
|
||||
],
|
||||
)
|
||||
|
||||
# Hive tables - CREATE EXTERNAL TABLE
|
||||
ct_hive = exp.Create(
|
||||
this=table_schema,
|
||||
kind="TABLE",
|
||||
properties=exp.Properties(
|
||||
expressions=[
|
||||
exp.ExternalProperty(),
|
||||
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(expressions=[exp.to_column("partition_col")])
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
ct_hive.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE EXTERNAL TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`)",
|
||||
)
|
||||
|
||||
# Iceberg tables - CREATE TABLE... TBLPROPERTIES ('table_type'='iceberg')
|
||||
# no EXTERNAL keyword and the 'table_type=iceberg' property must be set
|
||||
# ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
|
||||
ct_iceberg = exp.Create(
|
||||
this=table_schema,
|
||||
kind="TABLE",
|
||||
properties=exp.Properties(
|
||||
expressions=[
|
||||
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(
|
||||
expressions=[
|
||||
exp.to_column("partition_col"),
|
||||
exp.PartitionedByBucket(
|
||||
this=exp.to_column("a"), expression=exp.Literal.number(4)
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
|
||||
]
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
ct_iceberg.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`, BUCKET(4, `a`)) TBLPROPERTIES ('table_type'='iceberg')",
|
||||
)
|
||||
|
||||
def test_ctas(self):
|
||||
# Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
|
||||
# In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields
|
||||
|
@ -223,7 +284,11 @@ class TestAthena(Validator):
|
|||
)
|
||||
self.assertEqual(
|
||||
ctas_hive.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
|
||||
)
|
||||
self.assertEqual(
|
||||
ctas_hive.sql(dialect=self.dialect, identify=False),
|
||||
"CREATE TABLE foo.bar WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
|
||||
)
|
||||
|
||||
ctas_iceberg = exp.Create(
|
||||
|
@ -234,7 +299,14 @@ class TestAthena(Validator):
|
|||
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
|
||||
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
|
||||
exp.PartitionedByProperty(
|
||||
this=exp.Schema(expressions=[exp.to_column("partition_col")])
|
||||
this=exp.Schema(
|
||||
expressions=[
|
||||
exp.to_column("partition_col"),
|
||||
exp.PartitionedByBucket(
|
||||
this=exp.to_column("a"), expression=exp.Literal.number(4)
|
||||
),
|
||||
]
|
||||
)
|
||||
),
|
||||
]
|
||||
),
|
||||
|
@ -242,5 +314,9 @@ class TestAthena(Validator):
|
|||
)
|
||||
self.assertEqual(
|
||||
ctas_iceberg.sql(dialect=self.dialect, identify=True),
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1",
|
||||
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
|
||||
)
|
||||
self.assertEqual(
|
||||
ctas_iceberg.sql(dialect=self.dialect, identify=False),
|
||||
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
|
||||
)
|
||||
|
|
|
@ -308,10 +308,6 @@ LANGUAGE js AS
|
|||
"""SELECT JSON '"foo"' AS json_data""",
|
||||
"""SELECT PARSE_JSON('"foo"') AS json_data""",
|
||||
)
|
||||
self.validate_identity(
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
|
||||
)
|
||||
self.validate_identity(
|
||||
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))",
|
||||
"SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))",
|
||||
|
@ -1519,8 +1515,8 @@ WHERE
|
|||
self.validate_all(
|
||||
"SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')",
|
||||
write={
|
||||
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL 1 DAY) AS DATE[])",
|
||||
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL 1 DAY)",
|
||||
"duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' DAY) AS DATE[])",
|
||||
"bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' DAY)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
@ -2424,3 +2420,16 @@ OPTIONS (
|
|||
"SELECT 1 AS x UNION ALL STRICT CORRESPONDING BY (foo, bar) SELECT 2 AS x",
|
||||
"SELECT 1 AS x UNION ALL BY NAME ON (foo, bar) SELECT 2 AS x",
|
||||
)
|
||||
|
||||
def test_with_offset(self):
|
||||
self.validate_identity(
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET",
|
||||
"SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset",
|
||||
)
|
||||
|
||||
for join_ops in ("LEFT", "RIGHT", "FULL", "NATURAL", "SEMI", "ANTI"):
|
||||
with self.subTest(f"Testing {join_ops} in test_with_offset"):
|
||||
self.validate_identity(
|
||||
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET {join_ops} JOIN foo",
|
||||
f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET AS offset {join_ops} JOIN foo",
|
||||
)
|
||||
|
|
|
@ -1034,7 +1034,7 @@ class TestDuckDB(Validator):
|
|||
"clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))",
|
||||
"duckdb": "EPOCH_MS(x)",
|
||||
"mysql": "FROM_UNIXTIME(x / POWER(10, 3))",
|
||||
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / 10 ^ 3)",
|
||||
"postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))",
|
||||
"presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))",
|
||||
"spark": "TIMESTAMP_MILLIS(x)",
|
||||
},
|
||||
|
|
|
@ -568,7 +568,7 @@ FROM json_data, field_ids""",
|
|||
"x ^ y",
|
||||
write={
|
||||
"": "POWER(x, y)",
|
||||
"postgres": "x ^ y",
|
||||
"postgres": "POWER(x, y)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
@ -765,7 +765,7 @@ FROM json_data, field_ids""",
|
|||
"x / y ^ z",
|
||||
write={
|
||||
"": "x / POWER(y, z)",
|
||||
"postgres": "x / y ^ z",
|
||||
"postgres": "x / POWER(y, z)",
|
||||
},
|
||||
)
|
||||
self.validate_all(
|
||||
|
|
|
@ -609,7 +609,7 @@ class TestSnowflake(Validator):
|
|||
"hive": "POWER(x, 2)",
|
||||
"mysql": "POWER(x, 2)",
|
||||
"oracle": "POWER(x, 2)",
|
||||
"postgres": "x ^ 2",
|
||||
"postgres": "POWER(x, 2)",
|
||||
"presto": "POWER(x, 2)",
|
||||
"redshift": "POWER(x, 2)",
|
||||
"snowflake": "POWER(x, 2)",
|
||||
|
@ -2563,3 +2563,12 @@ SINGLE = TRUE""",
|
|||
"duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t",
|
||||
},
|
||||
)
|
||||
|
||||
def test_rely_options(self):
|
||||
for option in ("NORELY", "RELY"):
|
||||
self.validate_identity(
|
||||
f"CREATE TABLE t (col1 INT PRIMARY KEY {option}, col2 INT UNIQUE {option}, col3 INT NOT NULL FOREIGN KEY REFERENCES other_t (id) {option})"
|
||||
)
|
||||
self.validate_identity(
|
||||
f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})"
|
||||
)
|
||||
|
|
|
@ -56,7 +56,7 @@ class TestSpark(Validator):
|
|||
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
write={
|
||||
"duckdb": "CREATE TABLE x",
|
||||
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
|
||||
"presto": "CREATE TABLE x WITH (FORMAT='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS(y)'])",
|
||||
"hive": "CREATE TABLE x STORED AS ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
|
||||
},
|
||||
|
|
|
@ -93,6 +93,16 @@ class TestTrino(Validator):
|
|||
"CREATE TABLE foo.bar WITH (LOCATION='s3://bucket/foo/bar') AS SELECT 1"
|
||||
)
|
||||
|
||||
# Hive connector syntax (partitioned_by)
|
||||
self.validate_identity(
|
||||
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])"
|
||||
)
|
||||
|
||||
# Iceberg connector syntax (partitioning, can contain Iceberg transform expressions)
|
||||
self.validate_identity(
|
||||
"CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])",
|
||||
)
|
||||
|
||||
def test_analyze(self):
|
||||
self.validate_identity("ANALYZE tbl")
|
||||
self.validate_identity("ANALYZE tbl WITH (prop1=val1, prop2=val2)")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue