2025-02-13 06:15:54 +01:00
from tests . dialects . test_dialect import Validator
class TestHive ( Validator ) :
dialect = " hive "
def test_bits ( self ) :
self . validate_all (
" x & 1 " ,
2025-02-13 20:46:55 +01:00
read = {
2025-02-13 06:15:54 +01:00
" duckdb " : " x & 1 " ,
" presto " : " BITWISE_AND(x, 1) " ,
2025-02-13 20:46:55 +01:00
" spark " : " x & 1 " ,
} ,
write = {
" duckdb " : " x & 1 " ,
2025-02-13 06:15:54 +01:00
" hive " : " x & 1 " ,
2025-02-13 20:46:55 +01:00
" presto " : " BITWISE_AND(x, 1) " ,
2025-02-13 06:15:54 +01:00
" spark " : " x & 1 " ,
} ,
)
self . validate_all (
2025-02-13 20:46:55 +01:00
" x & 1 > 0 " ,
read = {
" duckdb " : " x & 1 > 0 " ,
" presto " : " BITWISE_AND(x, 1) > 0 " ,
" spark " : " x & 1 > 0 " ,
} ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 20:46:55 +01:00
" duckdb " : " x & 1 > 0 " ,
" presto " : " BITWISE_AND(x, 1) > 0 " ,
" hive " : " x & 1 > 0 " ,
" spark " : " x & 1 > 0 " ,
} ,
)
self . validate_all (
" ~x " ,
read = {
2025-02-13 06:15:54 +01:00
" duckdb " : " ~x " ,
" presto " : " BITWISE_NOT(x) " ,
2025-02-13 20:46:55 +01:00
" spark " : " ~x " ,
} ,
write = {
" duckdb " : " ~x " ,
2025-02-13 06:15:54 +01:00
" hive " : " ~x " ,
2025-02-13 20:46:55 +01:00
" presto " : " BITWISE_NOT(x) " ,
2025-02-13 06:15:54 +01:00
" spark " : " ~x " ,
} ,
)
self . validate_all (
" x | 1 " ,
2025-02-13 20:46:55 +01:00
read = {
2025-02-13 06:15:54 +01:00
" duckdb " : " x | 1 " ,
" presto " : " BITWISE_OR(x, 1) " ,
2025-02-13 20:46:55 +01:00
" spark " : " x | 1 " ,
} ,
write = {
" duckdb " : " x | 1 " ,
2025-02-13 06:15:54 +01:00
" hive " : " x | 1 " ,
2025-02-13 20:46:55 +01:00
" presto " : " BITWISE_OR(x, 1) " ,
2025-02-13 06:15:54 +01:00
" spark " : " x | 1 " ,
} ,
)
self . validate_all (
" x << 1 " ,
read = {
" spark " : " SHIFTLEFT(x, 1) " ,
} ,
write = {
" duckdb " : " x << 1 " ,
" presto " : " BITWISE_ARITHMETIC_SHIFT_LEFT(x, 1) " ,
" hive " : " x << 1 " ,
" spark " : " SHIFTLEFT(x, 1) " ,
} ,
)
self . validate_all (
" x >> 1 " ,
read = {
" spark " : " SHIFTRIGHT(x, 1) " ,
} ,
write = {
" duckdb " : " x >> 1 " ,
" presto " : " BITWISE_ARITHMETIC_SHIFT_RIGHT(x, 1) " ,
" hive " : " x >> 1 " ,
" spark " : " SHIFTRIGHT(x, 1) " ,
} ,
)
def test_cast ( self ) :
self . validate_all (
" 1s " ,
write = {
2025-02-13 15:52:09 +01:00
" duckdb " : " TRY_CAST(1 AS SMALLINT) " ,
" presto " : " TRY_CAST(1 AS SMALLINT) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(1 AS SMALLINT) " ,
2025-02-13 20:44:18 +01:00
" spark " : " CAST(1 AS SMALLINT) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" 1S " ,
write = {
2025-02-13 15:52:09 +01:00
" duckdb " : " TRY_CAST(1 AS SMALLINT) " ,
" presto " : " TRY_CAST(1 AS SMALLINT) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(1 AS SMALLINT) " ,
2025-02-13 20:44:18 +01:00
" spark " : " CAST(1 AS SMALLINT) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" 1Y " ,
write = {
2025-02-13 15:52:09 +01:00
" duckdb " : " TRY_CAST(1 AS TINYINT) " ,
" presto " : " TRY_CAST(1 AS TINYINT) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(1 AS TINYINT) " ,
2025-02-13 20:44:18 +01:00
" spark " : " CAST(1 AS TINYINT) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" 1L " ,
write = {
2025-02-13 15:52:09 +01:00
" duckdb " : " TRY_CAST(1 AS BIGINT) " ,
" presto " : " TRY_CAST(1 AS BIGINT) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(1 AS BIGINT) " ,
2025-02-13 20:44:18 +01:00
" spark " : " CAST(1 AS BIGINT) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" 1.0bd " ,
write = {
2025-02-13 15:52:09 +01:00
" duckdb " : " TRY_CAST(1.0 AS DECIMAL) " ,
" presto " : " TRY_CAST(1.0 AS DECIMAL) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(1.0 AS DECIMAL) " ,
" spark " : " CAST(1.0 AS DECIMAL) " ,
} ,
)
self . validate_all (
" CAST(1 AS INT) " ,
read = {
" presto " : " TRY_CAST(1 AS INT) " ,
} ,
write = {
" duckdb " : " TRY_CAST(1 AS INT) " ,
" presto " : " TRY_CAST(1 AS INTEGER) " ,
" hive " : " CAST(1 AS INT) " ,
" spark " : " CAST(1 AS INT) " ,
} ,
)
def test_ddl ( self ) :
self . validate_all (
" CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT) " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE x (w TEXT) " , # Partition columns should exist in table
2025-02-13 14:40:43 +01:00
" presto " : " CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY[ ' y ' , ' z ' ]) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT) " ,
2025-02-13 21:27:51 +01:00
" spark " : " CREATE TABLE x (w STRING, y INT, z INT) PARTITIONED BY (y, z) " ,
2025-02-13 06:15:54 +01:00
} ,
)
2025-02-13 14:46:58 +01:00
self . validate_all (
" CREATE TABLE test STORED AS parquet TBLPROPERTIES ( ' x ' = ' 1 ' , ' Z ' = ' 2 ' ) AS SELECT 1 " ,
write = {
" duckdb " : " CREATE TABLE test AS SELECT 1 " ,
2025-02-13 14:56:25 +01:00
" presto " : " CREATE TABLE test WITH (FORMAT= ' PARQUET ' , x= ' 1 ' , Z= ' 2 ' ) AS SELECT 1 " ,
2025-02-13 14:46:58 +01:00
" hive " : " CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ( ' x ' = ' 1 ' , ' Z ' = ' 2 ' ) AS SELECT 1 " ,
" spark " : " CREATE TABLE test USING PARQUET TBLPROPERTIES ( ' x ' = ' 1 ' , ' Z ' = ' 2 ' ) AS SELECT 1 " ,
} ,
)
2025-02-13 06:15:54 +01:00
2025-02-13 14:58:37 +01:00
self . validate_identity (
""" CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE ' serde ' ROW FORMAT DELIMITED FIELDS TERMINATED BY ' 1 ' WITH SERDEPROPERTIES ( ' input.regex ' = ' ' ) """ ,
)
2025-02-13 15:52:09 +01:00
self . validate_identity (
""" CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE ' a ' STORED AS INPUTFORMAT ' b ' OUTPUTFORMAT ' c ' LOCATION ' d ' TBLPROPERTIES ( ' e ' = ' f ' ) """
)
2025-02-13 14:58:37 +01:00
2025-02-13 06:15:54 +01:00
def test_lateral_view ( self ) :
self . validate_all (
" SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b " ,
write = {
" presto " : " SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a) CROSS JOIN UNNEST(z) AS u(b) " ,
" hive " : " SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b " ,
" spark " : " SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b " ,
} ,
)
self . validate_all (
" SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a) " ,
" hive " : " SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a " ,
" spark " : " SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a " ,
} ,
)
self . validate_all (
" SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a) " ,
" hive " : " SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a " ,
" spark " : " SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a " ,
} ,
)
self . validate_all (
" SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a) " ,
" hive " : " SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
" spark " : " SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
} ,
)
def test_quotes ( self ) :
self . validate_all (
" ' \\ ' ' " ,
write = {
" duckdb " : " ' ' ' ' " ,
" presto " : " ' ' ' ' " ,
" hive " : " ' \\ ' ' " ,
" spark " : " ' \\ ' ' " ,
} ,
)
self . validate_all (
" ' \" x \" ' " ,
write = {
" duckdb " : " ' \" x \" ' " ,
" presto " : " ' \" x \" ' " ,
" hive " : " ' \" x \" ' " ,
" spark " : " ' \" x \" ' " ,
} ,
)
self . validate_all (
" \" ' x ' \" " ,
write = {
" duckdb " : " ' ' ' x ' ' ' " ,
" presto " : " ' ' ' x ' ' ' " ,
" hive " : " ' \\ ' x \\ ' ' " ,
" spark " : " ' \\ ' x \\ ' ' " ,
} ,
)
self . validate_all (
2025-02-13 21:30:28 +01:00
" ' \\ \\ \\ \\ a ' " ,
2025-02-13 06:15:54 +01:00
read = {
2025-02-13 21:30:28 +01:00
" drill " : " ' \\ \\ \\ \\ a ' " ,
" duckdb " : " ' \\ \\ a ' " ,
2025-02-13 15:26:26 +01:00
" presto " : " ' \\ \\ a ' " ,
2025-02-13 06:15:54 +01:00
} ,
write = {
2025-02-13 21:30:28 +01:00
" drill " : " ' \\ \\ \\ \\ a ' " ,
2025-02-13 15:26:26 +01:00
" duckdb " : " ' \\ \\ a ' " ,
2025-02-13 21:30:28 +01:00
" hive " : " ' \\ \\ \\ \\ a ' " ,
2025-02-13 15:26:26 +01:00
" presto " : " ' \\ \\ a ' " ,
2025-02-13 21:30:28 +01:00
" spark " : " ' \\ \\ \\ \\ a ' " ,
2025-02-13 06:15:54 +01:00
} ,
)
def test_regex ( self ) :
self . validate_all (
" a RLIKE ' x ' " ,
write = {
" duckdb " : " REGEXP_MATCHES(a, ' x ' ) " ,
" presto " : " REGEXP_LIKE(a, ' x ' ) " ,
" hive " : " a RLIKE ' x ' " ,
" spark " : " a RLIKE ' x ' " ,
} ,
)
self . validate_all (
" a REGEXP ' x ' " ,
write = {
" duckdb " : " REGEXP_MATCHES(a, ' x ' ) " ,
" presto " : " REGEXP_LIKE(a, ' x ' ) " ,
" hive " : " a RLIKE ' x ' " ,
" spark " : " a RLIKE ' x ' " ,
} ,
)
def test_time ( self ) :
2025-02-13 15:48:10 +01:00
self . validate_all (
" (UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) * 1000 " ,
read = {
" presto " : " DATE_DIFF( ' millisecond ' , x, y) " ,
} ,
)
self . validate_all (
" UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x) " ,
read = {
" presto " : " DATE_DIFF( ' second ' , x, y) " ,
} ,
)
self . validate_all (
" (UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 60 " ,
read = {
" presto " : " DATE_DIFF( ' minute ' , x, y) " ,
} ,
)
self . validate_all (
" (UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 3600 " ,
read = {
" presto " : " DATE_DIFF( ' hour ' , x, y) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" DATEDIFF(a, b) " ,
write = {
2025-02-13 21:19:14 +01:00
" duckdb " : " DATE_DIFF( ' DAY ' , CAST(b AS DATE), CAST(a AS DATE)) " ,
" presto " : " DATE_DIFF( ' DAY ' , CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
" hive " : " DATEDIFF(TO_DATE(a), TO_DATE(b)) " ,
" spark " : " DATEDIFF(TO_DATE(a), TO_DATE(b)) " ,
2025-02-13 21:19:14 +01:00
" " : " DATEDIFF(CAST(a AS DATE), CAST(b AS DATE)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
""" from_unixtime(x, " yyyy-MM-dd ' T ' HH " ) """ ,
write = {
2025-02-13 15:09:58 +01:00
" duckdb " : " STRFTIME(TO_TIMESTAMP(x), ' % Y- % m- %d ' ' T ' ' % H ' ) " ,
2025-02-13 06:15:54 +01:00
" presto " : " DATE_FORMAT(FROM_UNIXTIME(x), ' % Y- % m- %d ' ' T ' ' % H ' ) " ,
" hive " : " FROM_UNIXTIME(x, ' yyyy-MM-dd \\ ' T \\ ' HH ' ) " ,
" spark " : " FROM_UNIXTIME(x, ' yyyy-MM-dd \\ ' T \\ ' HH ' ) " ,
} ,
)
self . validate_all (
" DATE_FORMAT( ' 2020-01-01 ' , ' yyyy-MM-dd HH:mm:ss ' ) " ,
write = {
2025-02-13 21:17:09 +01:00
" bigquery " : " FORMAT_DATE( ' % Y- % m- %d % H: % M: % S ' , CAST( ' 2020-01-01 ' AS DATETIME)) " ,
2025-02-13 15:01:55 +01:00
" duckdb " : " STRFTIME(CAST( ' 2020-01-01 ' AS TIMESTAMP), ' % Y- % m- %d % H: % M: % S ' ) " ,
2025-02-13 15:09:58 +01:00
" presto " : " DATE_FORMAT(CAST( ' 2020-01-01 ' AS TIMESTAMP), ' % Y- % m- %d % T ' ) " ,
2025-02-13 15:01:55 +01:00
" hive " : " DATE_FORMAT(CAST( ' 2020-01-01 ' AS TIMESTAMP), ' yyyy-MM-dd HH:mm:ss ' ) " ,
" spark " : " DATE_FORMAT(CAST( ' 2020-01-01 ' AS TIMESTAMP), ' yyyy-MM-dd HH:mm:ss ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" DATE_ADD( ' 2020-01-01 ' , 1) " ,
write = {
2025-02-13 21:17:09 +01:00
" " : " TS_OR_DS_ADD( ' 2020-01-01 ' , 1, DAY) " ,
" bigquery " : " DATE_ADD(CAST(CAST( ' 2020-01-01 ' AS DATETIME) AS DATE), INTERVAL 1 DAY) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST( ' 2020-01-01 ' AS DATE) + INTERVAL 1 DAY " ,
" hive " : " DATE_ADD( ' 2020-01-01 ' , 1) " ,
2025-02-13 21:17:09 +01:00
" presto " : " DATE_ADD( ' DAY ' , 1, CAST(CAST( ' 2020-01-01 ' AS TIMESTAMP) AS DATE)) " ,
" redshift " : " DATEADD(DAY, 1, ' 2020-01-01 ' ) " ,
2025-02-13 21:33:25 +01:00
" snowflake " : " DATEADD(DAY, 1, CAST(CAST( ' 2020-01-01 ' AS TIMESTAMP) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
" spark " : " DATE_ADD( ' 2020-01-01 ' , 1) " ,
2025-02-13 21:17:09 +01:00
" tsql " : " DATEADD(DAY, 1, CAST(CAST( ' 2020-01-01 ' AS DATETIME2) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" DATE_SUB( ' 2020-01-01 ' , 1) " ,
write = {
2025-02-13 21:17:09 +01:00
" " : " TS_OR_DS_ADD( ' 2020-01-01 ' , 1 * -1, DAY) " ,
" bigquery " : " DATE_ADD(CAST(CAST( ' 2020-01-01 ' AS DATETIME) AS DATE), INTERVAL (1 * -1) DAY) " ,
2025-02-13 15:23:26 +01:00
" duckdb " : " CAST( ' 2020-01-01 ' AS DATE) + INTERVAL (1 * -1) DAY " ,
2025-02-13 06:15:54 +01:00
" hive " : " DATE_ADD( ' 2020-01-01 ' , 1 * -1) " ,
2025-02-13 21:17:09 +01:00
" presto " : " DATE_ADD( ' DAY ' , 1 * -1, CAST(CAST( ' 2020-01-01 ' AS TIMESTAMP) AS DATE)) " ,
" redshift " : " DATEADD(DAY, 1 * -1, ' 2020-01-01 ' ) " ,
2025-02-13 21:33:25 +01:00
" snowflake " : " DATEADD(DAY, 1 * -1, CAST(CAST( ' 2020-01-01 ' AS TIMESTAMP) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
" spark " : " DATE_ADD( ' 2020-01-01 ' , 1 * -1) " ,
2025-02-13 21:17:09 +01:00
" tsql " : " DATEADD(DAY, 1 * -1, CAST(CAST( ' 2020-01-01 ' AS DATETIME2) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
2025-02-13 15:52:09 +01:00
self . validate_all ( " DATE_ADD( ' 2020-01-01 ' , -1) " , read = { " " : " DATE_SUB( ' 2020-01-01 ' , 1) " } )
self . validate_all ( " DATE_ADD(a, b * -1) " , read = { " " : " DATE_SUB(a, b) " } )
self . validate_all (
" ADD_MONTHS( ' 2020-01-01 ' , -2) " , read = { " " : " DATE_SUB( ' 2020-01-01 ' , 2, month) " }
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" DATEDIFF(TO_DATE(y), x) " ,
write = {
2025-02-13 21:19:14 +01:00
" duckdb " : " DATE_DIFF( ' DAY ' , CAST(x AS DATE), CAST(y AS DATE)) " ,
" presto " : " DATE_DIFF( ' DAY ' , CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE)) " ,
2025-02-13 21:17:09 +01:00
" hive " : " DATEDIFF(TO_DATE(y), TO_DATE(x)) " ,
" spark " : " DATEDIFF(TO_DATE(y), TO_DATE(x)) " ,
2025-02-13 21:19:14 +01:00
" " : " DATEDIFF(CAST(y AS DATE), CAST(x AS DATE)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" UNIX_TIMESTAMP(x) " ,
write = {
" duckdb " : " EPOCH(STRPTIME(x, ' % Y- % m- %d % H: % M: % S ' )) " ,
2025-02-13 21:30:28 +01:00
" presto " : " TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), ' % Y- % m- %d % T ' )), PARSE_DATETIME(CAST(x AS VARCHAR), ' yyyy-MM-dd HH:mm:ss ' ))) " ,
2025-02-13 06:15:54 +01:00
" hive " : " UNIX_TIMESTAMP(x) " ,
" spark " : " UNIX_TIMESTAMP(x) " ,
" " : " STR_TO_UNIX(x, ' % Y- % m- %d % H: % M: % S ' ) " ,
} ,
)
for unit in ( " DAY " , " MONTH " , " YEAR " ) :
self . validate_all (
f " { unit } (x) " ,
write = {
" duckdb " : f " { unit } (CAST(x AS DATE)) " ,
2025-02-13 20:04:59 +01:00
" presto " : f " { unit } (CAST(CAST(x AS TIMESTAMP) AS DATE)) " ,
2025-02-13 06:15:54 +01:00
" hive " : f " { unit } (TO_DATE(x)) " ,
" spark " : f " { unit } (TO_DATE(x)) " ,
} ,
)
def test_order_by ( self ) :
self . validate_all (
" SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname " ,
write = {
2025-02-13 21:03:38 +01:00
" duckdb " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST " ,
" presto " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST " ,
" hive " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname " ,
" spark " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname " ,
2025-02-13 06:15:54 +01:00
} ,
)
def test_hive ( self ) :
2025-02-13 21:19:58 +01:00
self . validate_identity ( " SET hiveconf:some_var = 5 " , check_command_warning = True )
2025-02-13 21:19:14 +01:00
self . validate_identity ( " (VALUES (1 AS a, 2 AS b, 3)) " )
2025-02-13 20:58:22 +01:00
self . validate_identity ( " SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1) " )
self . validate_identity ( " SELECT * FROM my_table VERSION AS OF DATE_ADD(CURRENT_DATE, -1) " )
self . validate_identity (
" SELECT ROW() OVER (DISTRIBUTE BY x SORT BY y) " ,
" SELECT ROW() OVER (PARTITION BY x ORDER BY y) " ,
)
2025-02-13 20:51:40 +01:00
self . validate_identity ( " SELECT transform " )
2025-02-13 20:04:59 +01:00
self . validate_identity ( " SELECT * FROM test DISTRIBUTE BY y SORT BY x DESC ORDER BY l " )
self . validate_identity (
" SELECT * FROM test WHERE RAND() <= 0.1 DISTRIBUTE BY RAND() SORT BY RAND() "
)
self . validate_identity ( " (SELECT 1 UNION SELECT 2) DISTRIBUTE BY z " )
self . validate_identity ( " (SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x " )
self . validate_identity ( " (SELECT 1 UNION SELECT 2) CLUSTER BY y DESC " )
self . validate_identity ( " SELECT * FROM test CLUSTER BY y " )
self . validate_identity ( " (SELECT 1 UNION SELECT 2) SORT BY z " )
2025-02-13 15:08:15 +01:00
self . validate_identity (
" INSERT OVERWRITE TABLE zipcodes PARTITION(state = ' 0 ' ) VALUES (896, ' US ' , ' TAMPA ' , 33607) "
)
self . validate_identity (
" INSERT OVERWRITE TABLE zipcodes PARTITION(state = 0) VALUES (896, ' US ' , ' TAMPA ' , 33607) "
)
2025-02-13 21:32:41 +01:00
self . validate_identity (
" INSERT OVERWRITE DIRECTORY ' x ' ROW FORMAT DELIMITED FIELDS TERMINATED BY ' \001 ' COLLECTION ITEMS TERMINATED BY ' , ' MAP KEYS TERMINATED BY ' : ' LINES TERMINATED BY ' ' STORED AS TEXTFILE SELECT * FROM `a`.`b` "
)
2025-02-13 15:08:15 +01:00
self . validate_identity (
2025-02-13 15:26:26 +01:00
" SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b, GROUPING SETS ((a, b), a) "
2025-02-13 15:08:15 +01:00
)
self . validate_identity (
2025-02-13 15:26:26 +01:00
" SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b, GROUPING SETS ((t.a, b), a) "
2025-02-13 15:08:15 +01:00
)
self . validate_identity (
2025-02-13 15:26:26 +01:00
" SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, FOO(b), GROUPING SETS ((a, FOO(b)), a) "
2025-02-13 15:08:15 +01:00
)
self . validate_identity (
" SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH CUBE "
)
self . validate_identity (
" SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH ROLLUP "
)
2025-02-13 21:29:39 +01:00
self . validate_identity (
" TRUNCATE TABLE t1 PARTITION(age = 10, name = ' test ' , address = ' abc ' ) "
)
2025-02-13 21:16:09 +01:00
self . validate_all (
" SELECT $ { hiveconf:some_var} " ,
write = {
" hive " : " SELECT $ { hiveconf:some_var} " ,
" spark " : " SELECT $ { hiveconf:some_var} " ,
} ,
)
2025-02-13 15:03:38 +01:00
self . validate_all (
" SELECT A.1a AS b FROM test_a AS A " ,
write = {
" spark " : " SELECT A.1a AS b FROM test_a AS A " ,
} ,
)
self . validate_all (
" SELECT 1_a AS a FROM test_table " ,
write = {
" spark " : " SELECT 1_a AS a FROM test_table " ,
2025-02-13 15:58:40 +01:00
" trino " : ' SELECT " 1_a " AS a FROM test_table ' ,
2025-02-13 15:03:38 +01:00
} ,
)
self . validate_all (
" SELECT a_b AS 1_a FROM test_table " ,
write = {
" spark " : " SELECT a_b AS 1_a FROM test_table " ,
} ,
)
2025-02-13 15:05:06 +01:00
self . validate_all (
" SELECT 1a_1a FROM test_a " ,
write = {
" spark " : " SELECT 1a_1a FROM test_a " ,
} ,
)
self . validate_all (
" SELECT 1a AS 1a_1a FROM test_a " ,
write = {
" spark " : " SELECT 1a AS 1a_1a FROM test_a " ,
} ,
)
self . validate_all (
" CREATE TABLE test_table (1a STRING) " ,
write = {
" spark " : " CREATE TABLE test_table (1a STRING) " ,
} ,
)
self . validate_all (
" CREATE TABLE test_table2 (1a_1a STRING) " ,
write = {
" spark " : " CREATE TABLE test_table2 (1a_1a STRING) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" PERCENTILE(x, 0.5) " ,
write = {
" duckdb " : " QUANTILE(x, 0.5) " ,
" presto " : " APPROX_PERCENTILE(x, 0.5) " ,
" hive " : " PERCENTILE(x, 0.5) " ,
" spark " : " PERCENTILE(x, 0.5) " ,
} ,
)
2025-02-13 08:04:41 +01:00
self . validate_all (
" PERCENTILE_APPROX(x, 0.5) " ,
read = {
" hive " : " PERCENTILE_APPROX(x, 0.5) " ,
" presto " : " APPROX_PERCENTILE(x, 0.5) " ,
" duckdb " : " APPROX_QUANTILE(x, 0.5) " ,
" spark " : " PERCENTILE_APPROX(x, 0.5) " ,
} ,
write = {
" hive " : " PERCENTILE_APPROX(x, 0.5) " ,
" presto " : " APPROX_PERCENTILE(x, 0.5) " ,
" duckdb " : " APPROX_QUANTILE(x, 0.5) " ,
" spark " : " PERCENTILE_APPROX(x, 0.5) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" APPROX_COUNT_DISTINCT(a) " ,
write = {
2025-02-13 21:32:41 +01:00
" bigquery " : " APPROX_COUNT_DISTINCT(a) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " APPROX_COUNT_DISTINCT(a) " ,
" presto " : " APPROX_DISTINCT(a) " ,
" hive " : " APPROX_COUNT_DISTINCT(a) " ,
2025-02-13 21:32:41 +01:00
" snowflake " : " APPROX_COUNT_DISTINCT(a) " ,
2025-02-13 06:15:54 +01:00
" spark " : " APPROX_COUNT_DISTINCT(a) " ,
} ,
)
self . validate_all (
" ARRAY_CONTAINS(x, 1) " ,
2025-02-13 21:17:09 +01:00
read = {
" duckdb " : " LIST_HAS(x, 1) " ,
" snowflake " : " ARRAY_CONTAINS(1, x) " ,
} ,
2025-02-13 06:15:54 +01:00
write = {
" duckdb " : " ARRAY_CONTAINS(x, 1) " ,
" presto " : " CONTAINS(x, 1) " ,
" hive " : " ARRAY_CONTAINS(x, 1) " ,
" spark " : " ARRAY_CONTAINS(x, 1) " ,
2025-02-13 21:17:09 +01:00
" snowflake " : " ARRAY_CONTAINS(1, x) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SIZE(x) " ,
write = {
" duckdb " : " ARRAY_LENGTH(x) " ,
" presto " : " CARDINALITY(x) " ,
" hive " : " SIZE(x) " ,
" spark " : " SIZE(x) " ,
} ,
)
self . validate_all (
" LOCATE( ' a ' , x) " ,
write = {
" duckdb " : " STRPOS(x, ' a ' ) " ,
" presto " : " STRPOS(x, ' a ' ) " ,
" hive " : " LOCATE( ' a ' , x) " ,
" spark " : " LOCATE( ' a ' , x) " ,
} ,
)
self . validate_all (
" LOCATE( ' a ' , x, 3) " ,
write = {
" duckdb " : " STRPOS(SUBSTR(x, 3), ' a ' ) + 3 - 1 " ,
2025-02-13 21:30:28 +01:00
" presto " : " STRPOS(SUBSTR(x, 3), ' a ' ) + 3 - 1 " ,
2025-02-13 06:15:54 +01:00
" hive " : " LOCATE( ' a ' , x, 3) " ,
" spark " : " LOCATE( ' a ' , x, 3) " ,
} ,
)
self . validate_all (
" INITCAP( ' new york ' ) " ,
write = {
" duckdb " : " INITCAP( ' new york ' ) " ,
2025-02-13 15:03:38 +01:00
" presto " : r " REGEXP_REPLACE( ' new york ' , ' ( \ w)( \ w*) ' , x -> UPPER(x[1]) || LOWER(x[2])) " ,
2025-02-13 06:15:54 +01:00
" hive " : " INITCAP( ' new york ' ) " ,
" spark " : " INITCAP( ' new york ' ) " ,
} ,
)
self . validate_all (
2025-02-13 15:57:23 +01:00
" SELECT * FROM x.z TABLESAMPLE(10 PERCENT) y " ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 15:57:23 +01:00
" hive " : " SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y " ,
" spark " : " SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SELECT SORT_ARRAY(x) " ,
write = {
" duckdb " : " SELECT ARRAY_SORT(x) " ,
" presto " : " SELECT ARRAY_SORT(x) " ,
" hive " : " SELECT SORT_ARRAY(x) " ,
" spark " : " SELECT SORT_ARRAY(x) " ,
} ,
)
self . validate_all (
" SELECT SORT_ARRAY(x, FALSE) " ,
read = {
" duckdb " : " SELECT ARRAY_REVERSE_SORT(x) " ,
" spark " : " SELECT SORT_ARRAY(x, FALSE) " ,
} ,
write = {
" duckdb " : " SELECT ARRAY_REVERSE_SORT(x) " ,
" presto " : " SELECT ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END) " ,
" hive " : " SELECT SORT_ARRAY(x, FALSE) " ,
" spark " : " SELECT SORT_ARRAY(x, FALSE) " ,
} ,
)
self . validate_all (
" GET_JSON_OBJECT(x, ' $.name ' ) " ,
write = {
" presto " : " JSON_EXTRACT_SCALAR(x, ' $.name ' ) " ,
" hive " : " GET_JSON_OBJECT(x, ' $.name ' ) " ,
" spark " : " GET_JSON_OBJECT(x, ' $.name ' ) " ,
} ,
)
self . validate_all (
" MAP(a, b, c, d) " ,
2025-02-13 14:45:11 +01:00
read = {
" " : " VAR_MAP(a, b, c, d) " ,
" clickhouse " : " map(a, b, c, d) " ,
2025-02-13 20:58:22 +01:00
" duckdb " : " MAP([a, c], [b, d]) " ,
2025-02-13 14:45:11 +01:00
" hive " : " MAP(a, b, c, d) " ,
" presto " : " MAP(ARRAY[a, c], ARRAY[b, d]) " ,
" spark " : " MAP(a, b, c, d) " ,
} ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 14:45:11 +01:00
" " : " MAP(ARRAY(a, c), ARRAY(b, d)) " ,
" clickhouse " : " map(a, b, c, d) " ,
2025-02-13 20:58:22 +01:00
" duckdb " : " MAP([a, c], [b, d]) " ,
2025-02-13 06:15:54 +01:00
" presto " : " MAP(ARRAY[a, c], ARRAY[b, d]) " ,
" hive " : " MAP(a, b, c, d) " ,
2025-02-13 14:45:11 +01:00
" spark " : " MAP(a, b, c, d) " ,
2025-02-13 14:56:25 +01:00
" snowflake " : " OBJECT_CONSTRUCT(a, b, c, d) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" MAP(a, b) " ,
write = {
2025-02-13 20:58:22 +01:00
" duckdb " : " MAP([a], [b]) " ,
2025-02-13 06:15:54 +01:00
" presto " : " MAP(ARRAY[a], ARRAY[b]) " ,
" hive " : " MAP(a, b) " ,
2025-02-13 14:45:11 +01:00
" spark " : " MAP(a, b) " ,
2025-02-13 14:56:25 +01:00
" snowflake " : " OBJECT_CONSTRUCT(a, b) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" LOG(10) " ,
write = {
" duckdb " : " LN(10) " ,
" presto " : " LN(10) " ,
" hive " : " LN(10) " ,
" spark " : " LN(10) " ,
} ,
)
self . validate_all (
' ds = " 2020-01-01 " ' ,
write = {
" duckdb " : " ds = ' 2020-01-01 ' " ,
" presto " : " ds = ' 2020-01-01 ' " ,
" hive " : " ds = ' 2020-01-01 ' " ,
" spark " : " ds = ' 2020-01-01 ' " ,
} ,
)
self . validate_all (
" ds = \" 1 ' ' 2 \" " ,
write = {
" duckdb " : " ds = ' 1 ' ' ' ' 2 ' " ,
" presto " : " ds = ' 1 ' ' ' ' 2 ' " ,
" hive " : " ds = ' 1 \\ ' \\ ' 2 ' " ,
" spark " : " ds = ' 1 \\ ' \\ ' 2 ' " ,
} ,
)
self . validate_all (
" x == 1 " ,
write = {
" duckdb " : " x = 1 " ,
" presto " : " x = 1 " ,
" hive " : " x = 1 " ,
" spark " : " x = 1 " ,
} ,
)
self . validate_all (
" x div y " ,
write = {
2025-02-13 15:57:23 +01:00
" duckdb " : " x // y " ,
2025-02-13 21:17:09 +01:00
" presto " : " CAST(CAST(x AS DOUBLE) / y AS INTEGER) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(x / y AS INT) " ,
" spark " : " CAST(x / y AS INT) " ,
} ,
)
self . validate_all (
" COLLECT_LIST(x) " ,
read = {
" presto " : " ARRAY_AGG(x) " ,
} ,
write = {
" duckdb " : " ARRAY_AGG(x) " ,
" presto " : " ARRAY_AGG(x) " ,
" hive " : " COLLECT_LIST(x) " ,
" spark " : " COLLECT_LIST(x) " ,
} ,
)
self . validate_all (
" COLLECT_SET(x) " ,
read = {
2025-02-13 21:17:09 +01:00
" doris " : " COLLECT_SET(x) " ,
2025-02-13 06:15:54 +01:00
" presto " : " SET_AGG(x) " ,
2025-02-13 21:17:09 +01:00
" snowflake " : " ARRAY_UNIQUE_AGG(x) " ,
2025-02-13 06:15:54 +01:00
} ,
write = {
2025-02-13 21:17:09 +01:00
" doris " : " COLLECT_SET(x) " ,
2025-02-13 06:15:54 +01:00
" hive " : " COLLECT_SET(x) " ,
2025-02-13 21:17:09 +01:00
" presto " : " SET_AGG(x) " ,
" snowflake " : " ARRAY_UNIQUE_AGG(x) " ,
2025-02-13 06:15:54 +01:00
" spark " : " COLLECT_SET(x) " ,
} ,
)
self . validate_all (
2025-02-13 15:52:09 +01:00
" SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo " ,
2025-02-13 15:08:15 +01:00
read = {
2025-02-13 15:52:09 +01:00
" presto " : " SELECT * FROM x AS foo TABLESAMPLE BERNOULLI (1) " ,
2025-02-13 21:19:14 +01:00
" snowflake " : " SELECT * FROM x AS foo TABLESAMPLE (1) " ,
2025-02-13 15:08:15 +01:00
} ,
write = {
2025-02-13 15:52:09 +01:00
" hive " : " SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo " ,
2025-02-13 21:19:14 +01:00
" snowflake " : " SELECT * FROM x AS foo TABLESAMPLE (1) " ,
2025-02-13 15:52:09 +01:00
" spark " : " SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo " ,
2025-02-13 15:08:15 +01:00
} ,
)
self . validate_all (
2025-02-13 15:26:26 +01:00
" SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(b, ' yyyy ' ), GROUPING SETS ((a, DATE_FORMAT(b, ' yyyy ' )), a) " ,
2025-02-13 15:08:15 +01:00
write = {
2025-02-13 15:26:26 +01:00
" hive " : " SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(CAST(b AS TIMESTAMP), ' yyyy ' ), GROUPING SETS ((a, DATE_FORMAT(CAST(b AS TIMESTAMP), ' yyyy ' )), a) " ,
2025-02-13 15:08:15 +01:00
} ,
)
2025-02-13 15:26:26 +01:00
def test_escapes ( self ) - > None :
2025-02-13 21:30:28 +01:00
self . validate_identity ( " ' \n ' " , " ' \\ n ' " )
2025-02-13 15:26:26 +01:00
self . validate_identity ( " ' \\ n ' " )
2025-02-13 21:30:28 +01:00
self . validate_identity ( " ' \\ \n ' " , " ' \\ \\ \\ n ' " )
2025-02-13 15:26:26 +01:00
self . validate_identity ( " ' \\ \\ n ' " )
self . validate_identity ( " ' ' " )
self . validate_identity ( " ' \\ \\ ' " )
self . validate_identity ( " ' \\ \\ z ' " )
2025-02-13 15:50:57 +01:00
def test_data_type ( self ) :
self . validate_all (
" CAST(a AS BIT) " ,
write = {
" hive " : " CAST(a AS BOOLEAN) " ,
} ,
)