2025-02-13 14:48:46 +01:00
|
|
|
from sqlglot.dataframe.sql import types
|
|
|
|
from sqlglot.dataframe.sql.session import SparkSession
|
2025-02-13 20:58:22 +01:00
|
|
|
from tests.dataframe.unit.dataframe_test_base import DataFrameTestBase
|
2025-02-13 14:48:46 +01:00
|
|
|
|
|
|
|
|
2025-02-13 20:58:22 +01:00
|
|
|
class DataFrameSQLValidator(DataFrameTestBase):
|
2025-02-13 14:48:46 +01:00
|
|
|
def setUp(self) -> None:
|
2025-02-13 20:58:22 +01:00
|
|
|
super().setUp()
|
2025-02-13 14:48:46 +01:00
|
|
|
self.spark = SparkSession()
|
|
|
|
self.employee_schema = types.StructType(
|
|
|
|
[
|
|
|
|
types.StructField("employee_id", types.IntegerType(), False),
|
|
|
|
types.StructField("fname", types.StringType(), False),
|
|
|
|
types.StructField("lname", types.StringType(), False),
|
|
|
|
types.StructField("age", types.IntegerType(), False),
|
|
|
|
types.StructField("store_id", types.IntegerType(), False),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
employee_data = [
|
|
|
|
(1, "Jack", "Shephard", 37, 1),
|
|
|
|
(2, "John", "Locke", 65, 1),
|
|
|
|
(3, "Kate", "Austen", 37, 2),
|
|
|
|
(4, "Claire", "Littleton", 27, 2),
|
|
|
|
(5, "Hugo", "Reyes", 29, 100),
|
|
|
|
]
|
2025-02-13 14:53:05 +01:00
|
|
|
self.df_employee = self.spark.createDataFrame(
|
|
|
|
data=employee_data, schema=self.employee_schema
|
|
|
|
)
|