1
0
Fork 0

Adding upstream version 26.15.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-04-21 09:50:00 +02:00
parent dfe1cec38a
commit 4bfa0e7e53
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
58 changed files with 4878 additions and 4677 deletions

View file

@ -202,6 +202,67 @@ class TestAthena(Validator):
identify=True,
)
def test_create_table(self):
# There are two CREATE TABLE syntaxes
# Both hit Athena's Hive engine but creating an Iceberg table is different from creating a normal Hive table
table_schema = exp.Schema(
this=exp.to_table("foo.bar"),
expressions=[
exp.ColumnDef(this=exp.to_identifier("a"), kind=exp.DataType.build("int")),
exp.ColumnDef(this=exp.to_identifier("b"), kind=exp.DataType.build("varchar")),
],
)
# Hive tables - CREATE EXTERNAL TABLE
ct_hive = exp.Create(
this=table_schema,
kind="TABLE",
properties=exp.Properties(
expressions=[
exp.ExternalProperty(),
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
),
]
),
)
self.assertEqual(
ct_hive.sql(dialect=self.dialect, identify=True),
"CREATE EXTERNAL TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`)",
)
# Iceberg tables - CREATE TABLE... TBLPROPERTIES ('table_type'='iceberg')
# no EXTERNAL keyword and the 'table_type=iceberg' property must be set
# ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
ct_iceberg = exp.Create(
this=table_schema,
kind="TABLE",
properties=exp.Properties(
expressions=[
exp.FileFormatProperty(this=exp.Literal.string("parquet")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
),
]
)
),
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
]
),
)
self.assertEqual(
ct_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`, BUCKET(4, `a`)) TBLPROPERTIES ('table_type'='iceberg')",
)
def test_ctas(self):
# Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location
# In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields
@ -223,7 +284,11 @@ class TestAthena(Validator):
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['\"partition_col\"']) AS SELECT 1",
)
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1",
)
ctas_iceberg = exp.Create(
@ -234,7 +299,14 @@ class TestAthena(Validator):
exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")),
exp.LocationProperty(this=exp.Literal.string("s3://foo")),
exp.PartitionedByProperty(
this=exp.Schema(expressions=[exp.to_column("partition_col")])
this=exp.Schema(
expressions=[
exp.to_column("partition_col"),
exp.PartitionedByBucket(
this=exp.to_column("a"), expression=exp.Literal.number(4)
),
]
)
),
]
),
@ -242,5 +314,9 @@ class TestAthena(Validator):
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=True),
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col']) AS SELECT 1",
"CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['\"partition_col\"', 'BUCKET(\"a\", 4)']) AS SELECT 1",
)
self.assertEqual(
ctas_iceberg.sql(dialect=self.dialect, identify=False),
"CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1",
)