Merging upstream version 18.2.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
985db29269
commit
53cf4a81a6
124 changed files with 60313 additions and 50346 deletions
|
@ -1,14 +1,11 @@
|
|||
import typing as t
|
||||
import unittest
|
||||
|
||||
from sqlglot.dataframe.sql import types
|
||||
from sqlglot.dataframe.sql.dataframe import DataFrame
|
||||
from sqlglot.dataframe.sql.session import SparkSession
|
||||
from sqlglot.helper import ensure_list
|
||||
from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
|
||||
|
||||
|
||||
class DataFrameSQLValidator(unittest.TestCase):
|
||||
class DataFrameSQLValidator(DataFrameTestBase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.spark = SparkSession()
|
||||
self.employee_schema = types.StructType(
|
||||
[
|
||||
|
@ -29,12 +26,3 @@ class DataFrameSQLValidator(unittest.TestCase):
|
|||
self.df_employee = self.spark.createDataFrame(
|
||||
data=employee_data, schema=self.employee_schema
|
||||
)
|
||||
|
||||
def compare_sql(
|
||||
self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False
|
||||
):
|
||||
actual_sqls = df.sql(pretty=pretty)
|
||||
expected_statements = ensure_list(expected_statements)
|
||||
self.assertEqual(len(expected_statements), len(actual_sqls))
|
||||
for expected, actual in zip(expected_statements, actual_sqls):
|
||||
self.assertEqual(expected, actual)
|
||||
|
|
23
tests/dataframe/unit/dataframe_test_base.py
Normal file
23
tests/dataframe/unit/dataframe_test_base.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import typing as t
|
||||
import unittest
|
||||
|
||||
import sqlglot
|
||||
from sqlglot import MappingSchema
|
||||
from sqlglot.dataframe.sql import SparkSession
|
||||
from sqlglot.dataframe.sql.dataframe import DataFrame
|
||||
from sqlglot.helper import ensure_list
|
||||
|
||||
|
||||
class DataFrameTestBase(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
sqlglot.schema = MappingSchema()
|
||||
SparkSession._instance = None
|
||||
|
||||
def compare_sql(
|
||||
self, df: DataFrame, expected_statements: t.Union[str, t.List[str]], pretty=False
|
||||
):
|
||||
actual_sqls = df.sql(pretty=pretty)
|
||||
expected_statements = ensure_list(expected_statements)
|
||||
self.assertEqual(len(expected_statements), len(actual_sqls))
|
||||
for expected, actual in zip(expected_statements, actual_sqls):
|
||||
self.assertEqual(expected, actual)
|
|
@ -1,9 +1,6 @@
|
|||
from unittest import mock
|
||||
|
||||
import sqlglot
|
||||
from sqlglot.dataframe.sql import functions as F, types
|
||||
from sqlglot.dataframe.sql.session import SparkSession
|
||||
from sqlglot.schema import MappingSchema
|
||||
from tests.dataframe.unit.dataframe_sql_validator import DataFrameSQLValidator
|
||||
|
||||
|
||||
|
@ -68,7 +65,6 @@ class TestDataframeSession(DataFrameSQLValidator):
|
|||
|
||||
self.compare_sql(df, expected)
|
||||
|
||||
@mock.patch("sqlglot.schema", MappingSchema())
|
||||
def test_sql_select_only(self):
|
||||
query = "SELECT cola, colb FROM table"
|
||||
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
|
||||
|
@ -78,16 +74,6 @@ class TestDataframeSession(DataFrameSQLValidator):
|
|||
df.sql(pretty=False)[0],
|
||||
)
|
||||
|
||||
@mock.patch("sqlglot.schema", MappingSchema())
|
||||
def test_select_quoted(self):
|
||||
sqlglot.schema.add_table("`TEST`", {"name": "string"}, dialect="spark")
|
||||
|
||||
self.assertEqual(
|
||||
SparkSession().table("`TEST`").select(F.col("name")).sql(dialect="snowflake")[0],
|
||||
'''SELECT "test"."name" AS "name" FROM "test" AS "test"''',
|
||||
)
|
||||
|
||||
@mock.patch("sqlglot.schema", MappingSchema())
|
||||
def test_sql_with_aggs(self):
|
||||
query = "SELECT cola, colb FROM table"
|
||||
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
|
||||
|
@ -97,7 +83,6 @@ class TestDataframeSession(DataFrameSQLValidator):
|
|||
df.sql(pretty=False, optimize=False)[0],
|
||||
)
|
||||
|
||||
@mock.patch("sqlglot.schema", MappingSchema())
|
||||
def test_sql_create(self):
|
||||
query = "CREATE TABLE new_table AS WITH t1 AS (SELECT cola, colb FROM table) SELECT cola, colb, FROM t1"
|
||||
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
|
||||
|
@ -105,7 +90,6 @@ class TestDataframeSession(DataFrameSQLValidator):
|
|||
expected = "CREATE TABLE new_table AS SELECT `table`.`cola` AS `cola`, `table`.`colb` AS `colb` FROM `table` AS `table`"
|
||||
self.compare_sql(df, expected)
|
||||
|
||||
@mock.patch("sqlglot.schema", MappingSchema())
|
||||
def test_sql_insert(self):
|
||||
query = "WITH t1 AS (SELECT cola, colb FROM table) INSERT INTO new_table SELECT cola, colb FROM t1"
|
||||
sqlglot.schema.add_table("table", {"cola": "string", "colb": "string"}, dialect="spark")
|
||||
|
@ -114,5 +98,4 @@ class TestDataframeSession(DataFrameSQLValidator):
|
|||
self.compare_sql(df, expected)
|
||||
|
||||
def test_session_create_builder_patterns(self):
|
||||
spark = SparkSession()
|
||||
self.assertEqual(spark.builder.appName("abc").getOrCreate(), spark)
|
||||
self.assertEqual(SparkSession.builder.appName("abc").getOrCreate(), SparkSession())
|
||||
|
|
81
tests/dataframe/unit/test_session_case_sensitivity.py
Normal file
81
tests/dataframe/unit/test_session_case_sensitivity.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
import sqlglot
|
||||
from sqlglot.dataframe.sql import functions as F
|
||||
from sqlglot.dataframe.sql.session import SparkSession
|
||||
from sqlglot.errors import OptimizeError
|
||||
from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
|
||||
|
||||
|
||||
class TestSessionCaseSensitivity(DataFrameTestBase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.spark = SparkSession.builder.config("sqlframe.dialect", "snowflake").getOrCreate()
|
||||
|
||||
tests = [
|
||||
(
|
||||
"All lower no intention of CS",
|
||||
"test",
|
||||
"test",
|
||||
{"name": "VARCHAR"},
|
||||
"name",
|
||||
'''SELECT "TEST"."NAME" AS "NAME" FROM "TEST" AS "TEST"''',
|
||||
),
|
||||
(
|
||||
"Table has CS while column does not",
|
||||
'"Test"',
|
||||
'"Test"',
|
||||
{"name": "VARCHAR"},
|
||||
"name",
|
||||
'''SELECT "TEST"."NAME" AS "NAME" FROM "Test" AS "TEST"''',
|
||||
),
|
||||
(
|
||||
"Column has CS while table does not",
|
||||
"test",
|
||||
"test",
|
||||
{'"Name"': "VARCHAR"},
|
||||
'"Name"',
|
||||
'''SELECT "TEST"."Name" AS "Name" FROM "TEST" AS "TEST"''',
|
||||
),
|
||||
(
|
||||
"Both Table and column have CS",
|
||||
'"Test"',
|
||||
'"Test"',
|
||||
{'"Name"': "VARCHAR"},
|
||||
'"Name"',
|
||||
'''SELECT "TEST"."Name" AS "Name" FROM "Test" AS "TEST"''',
|
||||
),
|
||||
(
|
||||
"Lowercase CS table and column",
|
||||
'"test"',
|
||||
'"test"',
|
||||
{'"name"': "VARCHAR"},
|
||||
'"name"',
|
||||
'''SELECT "TEST"."name" AS "name" FROM "test" AS "TEST"''',
|
||||
),
|
||||
(
|
||||
"CS table and column and query table but no CS in query column",
|
||||
'"test"',
|
||||
'"test"',
|
||||
{'"name"': "VARCHAR"},
|
||||
"name",
|
||||
OptimizeError(),
|
||||
),
|
||||
(
|
||||
"CS table and column and query column but no CS in query table",
|
||||
'"test"',
|
||||
"test",
|
||||
{'"name"': "VARCHAR"},
|
||||
'"name"',
|
||||
OptimizeError(),
|
||||
),
|
||||
]
|
||||
|
||||
def test_basic_case_sensitivity(self):
|
||||
for test_name, table_name, spark_table, schema, spark_column, expected in self.tests:
|
||||
with self.subTest(test_name):
|
||||
sqlglot.schema.add_table(table_name, schema, dialect=self.spark.dialect)
|
||||
df = self.spark.table(spark_table).select(F.col(spark_column))
|
||||
if isinstance(expected, OptimizeError):
|
||||
with self.assertRaises(OptimizeError):
|
||||
df.sql()
|
||||
else:
|
||||
self.compare_sql(df, expected)
|
|
@ -1,10 +1,9 @@
|
|||
import unittest
|
||||
|
||||
from sqlglot.dataframe.sql import functions as F
|
||||
from sqlglot.dataframe.sql.window import Window, WindowSpec
|
||||
from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
|
||||
|
||||
|
||||
class TestDataframeWindow(unittest.TestCase):
|
||||
class TestDataframeWindow(DataFrameTestBase):
|
||||
def test_window_spec_partition_by(self):
|
||||
partition_by = WindowSpec().partitionBy(F.col("cola"), F.col("colb"))
|
||||
self.assertEqual("OVER (PARTITION BY cola, colb)", partition_by.sql())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue