2025-02-13 14:51:47 +01:00
from tests . dialects . test_dialect import Validator
class TestDatabricks ( Validator ) :
dialect = " databricks "
2025-02-13 15:23:26 +01:00
def test_databricks ( self ) :
2025-02-13 20:58:22 +01:00
self . validate_identity ( " CREATE TABLE target SHALLOW CLONE source " )
2025-02-13 20:15:27 +01:00
self . validate_identity ( " INSERT INTO a REPLACE WHERE cond VALUES (1), (2) " )
2025-02-13 15:52:09 +01:00
self . validate_identity ( " SELECT c1 : price " )
2025-02-13 15:23:26 +01:00
self . validate_identity ( " CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x + 1 " )
2025-02-13 15:26:26 +01:00
self . validate_identity ( " CREATE FUNCTION a AS b " )
self . validate_identity ( " SELECT $ {x} FROM $ {y} WHERE $ {z} > 1 " )
2025-02-13 15:57:23 +01:00
self . validate_identity ( " CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE))) " )
2025-02-13 20:55:29 +01:00
self . validate_identity (
" SELECT * FROM sales UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`)) "
)
self . validate_identity (
" SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`)) "
)
2025-02-13 15:57:23 +01:00
self . validate_all (
" CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y))) " ,
write = {
" databricks " : " CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y)))) " ,
} ,
)
2025-02-13 15:23:26 +01:00
2025-02-13 15:52:09 +01:00
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json ( self ) :
self . validate_identity ( """ SELECT c1 : price FROM VALUES ( ' { " price " : 5 } ' ) AS T(c1) """ )
self . validate_all (
""" SELECT c1:[ ' price ' ] FROM VALUES( ' { " price " : 5 } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : ARRAY( ' price ' ) FROM VALUES ( ' { " price " : 5 } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT c1:item[1].price FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : item[1].price FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT c1:item[*].price FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : item[*].price FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT from_json(c1:item[*].price, ' ARRAY<DOUBLE> ' )[0] FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT FROM_JSON(c1 : item[*].price, ' ARRAY<DOUBLE> ' )[0] FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT inline(from_json(c1:item[*], ' ARRAY<STRUCT<model STRING, price DOUBLE>> ' )) FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT INLINE(FROM_JSON(c1 : item[*], ' ARRAY<STRUCT<model STRING, price DOUBLE>> ' )) FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
2025-02-13 14:51:47 +01:00
def test_datediff ( self ) :
self . validate_all (
" SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
write = {
" tsql " : " SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
" databricks " : " SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
} ,
)
2025-02-13 15:01:55 +01:00
self . validate_all (
" SELECT DATEDIFF(microsecond, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(microsecond, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) * 1000000 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(millisecond, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(millisecond, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) * 1000 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(second, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(second, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(minute, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(minute, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 60 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(hour, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(hour, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 3600 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(day, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(day, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 86400 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(week, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(week, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 48 + EXTRACT(month FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 4 + EXTRACT(day FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) / 7 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(month, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(month, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(quarter, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(quarter, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) / 3 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
write = {
" databricks " : " SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) AS BIGINT) " ,
} ,
)
2025-02-13 14:51:47 +01:00
def test_add_date ( self ) :
self . validate_all (
" SELECT DATEADD(year, 1, ' 2020-01-01 ' ) " ,
write = {
" tsql " : " SELECT DATEADD(year, 1, ' 2020-01-01 ' ) " ,
" databricks " : " SELECT DATEADD(year, 1, ' 2020-01-01 ' ) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" SELECT DATEDIFF( ' end ' , ' start ' ) " ,
write = { " databricks " : " SELECT DATEDIFF(DAY, ' start ' , ' end ' ) " } ,
2025-02-13 14:51:47 +01:00
)
self . validate_all (
" SELECT DATE_ADD( ' 2020-01-01 ' , 1) " ,
write = {
" tsql " : " SELECT DATEADD(DAY, 1, ' 2020-01-01 ' ) " ,
" databricks " : " SELECT DATEADD(DAY, 1, ' 2020-01-01 ' ) " ,
} ,
)
2025-02-13 15:08:15 +01:00
def test_without_as ( self ) :
self . validate_all (
" CREATE TABLE x (SELECT 1) " ,
write = {
" databricks " : " CREATE TABLE x AS (SELECT 1) " ,
} ,
)
self . validate_all (
" WITH x (select 1) SELECT * FROM x " ,
write = {
" databricks " : " WITH x AS (SELECT 1) SELECT * FROM x " ,
} ,
)