1
0
Fork 0

Merging upstream version 15.0.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 15:57:23 +01:00
parent 8deb804d23
commit fc63828ee4
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
167 changed files with 58268 additions and 51337 deletions

View file

@ -1155,8 +1155,9 @@ class TestDataframeFunc(DataFrameValidator):
df, dfs = self.compare_spark_with_sqlglot(df_joined, dfs_joined)
self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(df))
self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(dfs))
# TODO: Add test to make sure with and without alias are the same once ids are deterministic
self.assertEqual(
"'UnresolvedHint BROADCAST, ['a2]", self.get_explain_plan(dfs).split("\n")[1]
)
def test_broadcast_func(self):
df_joined = self.df_spark_employee.join(
@ -1188,6 +1189,9 @@ class TestDataframeFunc(DataFrameValidator):
df, dfs = self.compare_spark_with_sqlglot(df_joined, dfs_joined)
self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(df))
self.assertIn("ResolvedHint (strategy=broadcast)", self.get_explain_plan(dfs))
self.assertEqual(
"'UnresolvedHint BROADCAST, ['a2]", self.get_explain_plan(dfs).split("\n")[1]
)
def test_repartition_by_num(self):
"""

View file

@ -70,13 +70,12 @@ class TestDataframeSession(DataFrameSQLValidator):
@mock.patch("sqlglot.schema", MappingSchema())
def test_sql_select_only(self):
# TODO: Do exact matches once CTE names are deterministic
query = "SELECT cola, colb FROM table"
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"})
df = self.spark.sql(query)
self.assertIn(
self.assertEqual(
"SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`",
df.sql(pretty=False),
df.sql(pretty=False)[0],
)
@mock.patch("sqlglot.schema", MappingSchema())
@ -90,14 +89,13 @@ class TestDataframeSession(DataFrameSQLValidator):
@mock.patch("sqlglot.schema", MappingSchema())
def test_sql_with_aggs(self):
# TODO: Do exact matches once CTE names are deterministic
query = "SELECT cola, colb FROM table"
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"})
df = self.spark.sql(query).groupBy(F.col("cola")).agg(F.sum("colb"))
result = df.sql(pretty=False, optimize=False)[0]
self.assertIn("SELECT cola, colb FROM table", result)
self.assertIn("SUM(colb)", result)
self.assertIn("GROUP BY cola", result)
self.assertEqual(
"WITH t38189 AS (SELECT cola, colb FROM table), t42330 AS (SELECT cola, colb FROM t38189) SELECT cola, SUM(colb) FROM t42330 GROUP BY cola",
df.sql(pretty=False, optimize=False)[0],
)
@mock.patch("sqlglot.schema", MappingSchema())
def test_sql_create(self):