2025-02-13 21:19:14 +01:00
from sqlglot import transpile
from sqlglot . errors import ParseError
2025-02-13 14:51:47 +01:00
from tests . dialects . test_dialect import Validator
class TestDatabricks ( Validator ) :
dialect = " databricks "
2025-02-13 15:23:26 +01:00
def test_databricks ( self ) :
2025-02-13 21:31:23 +01:00
self . validate_identity ( " DESCRIBE HISTORY a.b " )
self . validate_identity ( " DESCRIBE history.tbl " )
2025-02-13 21:03:38 +01:00
self . validate_identity ( " CREATE TABLE t (c STRUCT<interval: DOUBLE COMMENT ' aaa ' >) " )
2025-02-13 21:09:41 +01:00
self . validate_identity ( " CREATE TABLE my_table TBLPROPERTIES (a.b=15) " )
self . validate_identity ( " CREATE TABLE my_table TBLPROPERTIES ( ' a.b ' =15) " )
2025-02-13 20:59:47 +01:00
self . validate_identity ( " SELECT CAST( ' 11 23:4:0 ' AS INTERVAL DAY TO HOUR) " )
self . validate_identity ( " SELECT CAST( ' 11 23:4:0 ' AS INTERVAL DAY TO MINUTE) " )
self . validate_identity ( " SELECT CAST( ' 11 23:4:0 ' AS INTERVAL DAY TO SECOND) " )
self . validate_identity ( " SELECT CAST( ' 23:00:00 ' AS INTERVAL HOUR TO MINUTE) " )
self . validate_identity ( " SELECT CAST( ' 23:00:00 ' AS INTERVAL HOUR TO SECOND) " )
self . validate_identity ( " SELECT CAST( ' 23:00:00 ' AS INTERVAL MINUTE TO SECOND) " )
2025-02-13 20:58:22 +01:00
self . validate_identity ( " CREATE TABLE target SHALLOW CLONE source " )
2025-02-13 20:15:27 +01:00
self . validate_identity ( " INSERT INTO a REPLACE WHERE cond VALUES (1), (2) " )
2025-02-13 15:52:09 +01:00
self . validate_identity ( " SELECT c1 : price " )
2025-02-13 15:23:26 +01:00
self . validate_identity ( " CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x + 1 " )
2025-02-13 15:26:26 +01:00
self . validate_identity ( " CREATE FUNCTION a AS b " )
self . validate_identity ( " SELECT $ {x} FROM $ {y} WHERE $ {z} > 1 " )
2025-02-13 15:57:23 +01:00
self . validate_identity ( " CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE))) " )
2025-02-13 21:31:23 +01:00
self . validate_identity (
" SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(CAST(foo AS TIMESTAMP), ' America/Los_Angeles ' ) AS TIMESTAMP), ' yyyy-MM-dd HH:mm:ss ' ) AS foo FROM t "
)
2025-02-13 20:55:29 +01:00
self . validate_identity (
" SELECT * FROM sales UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`)) "
)
self . validate_identity (
" SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`)) "
)
2025-02-13 15:57:23 +01:00
2025-02-13 21:19:14 +01:00
self . validate_identity (
" CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x): \n return x+1$$ "
)
self . validate_identity (
" CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x): \n return x+1$FOO$ "
)
2025-02-13 21:29:39 +01:00
self . validate_identity ( " TRUNCATE TABLE t1 PARTITION(age = 10, name = ' test ' , address) " )
self . validate_identity (
" TRUNCATE TABLE t1 PARTITION(age = 10, name = ' test ' , city LIKE ' LA ' ) "
)
2025-02-13 21:19:14 +01:00
2025-02-13 15:57:23 +01:00
self . validate_all (
" CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y))) " ,
write = {
" databricks " : " CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(TO_DATE(y)))) " ,
2025-02-13 21:04:58 +01:00
" tsql " : " CREATE TABLE foo (x AS YEAR(CAST(y AS DATE))) " ,
} ,
)
self . validate_all (
" CREATE TABLE t1 AS (SELECT c FROM t2) " ,
read = {
" teradata " : " CREATE TABLE t1 AS (SELECT c FROM t2) WITH DATA " ,
2025-02-13 15:57:23 +01:00
} ,
)
2025-02-13 15:23:26 +01:00
2025-02-13 21:19:14 +01:00
with self . assertRaises ( ParseError ) :
transpile (
" CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo$def add_one(x): \n return x+1$$ " ,
read = " databricks " ,
)
with self . assertRaises ( ParseError ) :
transpile (
" CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo bar$def add_one(x): \n return x+1$foo bar$ " ,
read = " databricks " ,
)
2025-02-13 15:52:09 +01:00
# https://docs.databricks.com/sql/language-manual/functions/colonsign.html
def test_json ( self ) :
self . validate_identity ( """ SELECT c1 : price FROM VALUES ( ' { " price " : 5 } ' ) AS T(c1) """ )
self . validate_all (
""" SELECT c1:[ ' price ' ] FROM VALUES( ' { " price " : 5 } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : ARRAY( ' price ' ) FROM VALUES ( ' { " price " : 5 } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT c1:item[1].price FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : item[1].price FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT c1:item[*].price FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT c1 : item[*].price FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT from_json(c1:item[*].price, ' ARRAY<DOUBLE> ' )[0] FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT FROM_JSON(c1 : item[*].price, ' ARRAY<DOUBLE> ' )[0] FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
self . validate_all (
""" SELECT inline(from_json(c1:item[*], ' ARRAY<STRUCT<model STRING, price DOUBLE>> ' )) FROM VALUES( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
write = {
" databricks " : """ SELECT INLINE(FROM_JSON(c1 : item[*], ' ARRAY<STRUCT<model STRING, price DOUBLE>> ' )) FROM VALUES ( ' { " item " : [ { " model " : " basic " , " price " : 6.12 }, { " model " : " medium " , " price " : 9.24 } ] } ' ) AS T(c1) """ ,
} ,
)
2025-02-13 14:51:47 +01:00
def test_datediff ( self ) :
self . validate_all (
" SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" tsql " : " SELECT DATEDIFF(YEAR, ' start ' , ' end ' ) " ,
" databricks " : " SELECT DATEDIFF(YEAR, ' start ' , ' end ' ) " ,
2025-02-13 14:51:47 +01:00
} ,
)
2025-02-13 15:01:55 +01:00
self . validate_all (
" SELECT DATEDIFF(microsecond, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(MICROSECOND, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) * 1000000 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(millisecond, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(MILLISECOND, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) * 1000 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(second, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(SECOND, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(minute, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(MINUTE, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 60 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(hour, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(HOUR, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 3600 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(day, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(DAY, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(epoch FROM CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP)) / 86400 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(week, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(WEEK, ' start ' , ' end ' ) " ,
2025-02-13 21:17:09 +01:00
" postgres " : " SELECT CAST(EXTRACT(days FROM (CAST( ' end ' AS TIMESTAMP) - CAST( ' start ' AS TIMESTAMP))) / 7 AS BIGINT) " ,
2025-02-13 15:01:55 +01:00
} ,
)
self . validate_all (
" SELECT DATEDIFF(month, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(MONTH, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(quarter, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(QUARTER, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) / 3 AS BIGINT) " ,
} ,
)
self . validate_all (
" SELECT DATEDIFF(year, ' start ' , ' end ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" databricks " : " SELECT DATEDIFF(YEAR, ' start ' , ' end ' ) " ,
2025-02-13 15:01:55 +01:00
" postgres " : " SELECT CAST(EXTRACT(year FROM AGE(CAST( ' end ' AS TIMESTAMP), CAST( ' start ' AS TIMESTAMP))) AS BIGINT) " ,
} ,
)
2025-02-13 14:51:47 +01:00
def test_add_date ( self ) :
self . validate_all (
" SELECT DATEADD(year, 1, ' 2020-01-01 ' ) " ,
write = {
2025-02-13 21:19:14 +01:00
" tsql " : " SELECT DATEADD(YEAR, 1, ' 2020-01-01 ' ) " ,
" databricks " : " SELECT DATEADD(YEAR, 1, ' 2020-01-01 ' ) " ,
2025-02-13 14:51:47 +01:00
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" SELECT DATEDIFF( ' end ' , ' start ' ) " ,
write = { " databricks " : " SELECT DATEDIFF(DAY, ' start ' , ' end ' ) " } ,
2025-02-13 14:51:47 +01:00
)
self . validate_all (
" SELECT DATE_ADD( ' 2020-01-01 ' , 1) " ,
write = {
" tsql " : " SELECT DATEADD(DAY, 1, ' 2020-01-01 ' ) " ,
" databricks " : " SELECT DATEADD(DAY, 1, ' 2020-01-01 ' ) " ,
} ,
)
2025-02-13 15:08:15 +01:00
def test_without_as ( self ) :
self . validate_all (
" CREATE TABLE x (SELECT 1) " ,
write = {
" databricks " : " CREATE TABLE x AS (SELECT 1) " ,
} ,
)
self . validate_all (
" WITH x (select 1) SELECT * FROM x " ,
write = {
" databricks " : " WITH x AS (SELECT 1) SELECT * FROM x " ,
} ,
)