2025-02-13 20:55:29 +01:00
from sqlglot import UnsupportedError , exp , parse_one
2025-02-13 21:17:09 +01:00
from sqlglot . helper import logger as helper_logger
2025-02-13 06:15:54 +01:00
from tests . dialects . test_dialect import Validator
class TestPresto ( Validator ) :
dialect = " presto "
def test_cast ( self ) :
2025-02-13 21:57:37 +01:00
self . validate_identity ( " DEALLOCATE PREPARE my_query " , check_command_warning = True )
self . validate_identity ( " DESCRIBE INPUT x " , check_command_warning = True )
self . validate_identity ( " DESCRIBE OUTPUT x " , check_command_warning = True )
self . validate_identity (
" RESET SESSION hive.optimized_reader_enabled " , check_command_warning = True
)
2025-02-13 21:31:23 +01:00
self . validate_identity ( " SELECT * FROM x qualify " , " SELECT * FROM x AS qualify " )
2025-02-13 20:55:29 +01:00
self . validate_identity ( " CAST(x AS IPADDRESS) " )
self . validate_identity ( " CAST(x AS IPPREFIX) " )
2025-02-13 21:33:25 +01:00
self . validate_identity ( " CAST(TDIGEST_AGG(1) AS TDIGEST) " )
self . validate_identity ( " CAST(x AS HYPERLOGLOG) " )
2025-02-13 20:55:29 +01:00
2025-02-13 21:56:02 +01:00
self . validate_all (
" CAST(x AS BOOLEAN) " ,
read = {
" tsql " : " CAST(x AS BIT) " ,
} ,
write = {
" presto " : " CAST(x AS BOOLEAN) " ,
" tsql " : " CAST(x AS BIT) " ,
} ,
)
2025-02-13 21:52:32 +01:00
self . validate_all (
" SELECT FROM_ISO8601_TIMESTAMP( ' 2020-05-11T11:15:05 ' ) " ,
write = {
" duckdb " : " SELECT CAST( ' 2020-05-11T11:15:05 ' AS TIMESTAMPTZ) " ,
" presto " : " SELECT FROM_ISO8601_TIMESTAMP( ' 2020-05-11T11:15:05 ' ) " ,
} ,
)
2025-02-13 20:55:29 +01:00
self . validate_all (
" CAST(x AS INTERVAL YEAR TO MONTH) " ,
write = {
" oracle " : " CAST(x AS INTERVAL YEAR TO MONTH) " ,
" presto " : " CAST(x AS INTERVAL YEAR TO MONTH) " ,
} ,
)
self . validate_all (
" CAST(x AS INTERVAL DAY TO SECOND) " ,
write = {
" oracle " : " CAST(x AS INTERVAL DAY TO SECOND) " ,
" presto " : " CAST(x AS INTERVAL DAY TO SECOND) " ,
} ,
)
2025-02-13 20:46:55 +01:00
self . validate_all (
" SELECT CAST( ' 10C ' AS INTEGER) " ,
read = {
" postgres " : " SELECT CAST( ' 10C ' AS INTEGER) " ,
" presto " : " SELECT CAST( ' 10C ' AS INTEGER) " ,
" redshift " : " SELECT CAST( ' 10C ' AS INTEGER) " ,
} ,
)
2025-02-13 15:58:40 +01:00
self . validate_all (
2025-02-13 20:46:55 +01:00
" SELECT CAST( ' 1970-01-01 00:00:00 ' AS TIMESTAMP) " ,
2025-02-13 15:58:40 +01:00
read = { " postgres " : " SELECT ' epoch ' ::TIMESTAMP " } ,
)
2025-02-13 15:53:39 +01:00
self . validate_all (
" FROM_BASE64(x) " ,
read = {
" hive " : " UNBASE64(x) " ,
} ,
write = {
" hive " : " UNBASE64(x) " ,
" presto " : " FROM_BASE64(x) " ,
} ,
)
self . validate_all (
" TO_BASE64(x) " ,
read = {
" hive " : " BASE64(x) " ,
} ,
write = {
" hive " : " BASE64(x) " ,
" presto " : " TO_BASE64(x) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CAST(a AS ARRAY(INT)) " ,
write = {
" bigquery " : " CAST(a AS ARRAY<INT64>) " ,
2025-02-13 14:46:58 +01:00
" duckdb " : " CAST(a AS INT[]) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CAST(a AS ARRAY(INTEGER)) " ,
" spark " : " CAST(a AS ARRAY<INT>) " ,
2025-02-13 21:30:28 +01:00
" snowflake " : " CAST(a AS ARRAY(INT)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CAST(a AS VARCHAR) " ,
write = {
" bigquery " : " CAST(a AS STRING) " ,
" duckdb " : " CAST(a AS TEXT) " ,
" presto " : " CAST(a AS VARCHAR) " ,
" spark " : " CAST(a AS STRING) " ,
} ,
)
self . validate_all (
" CAST(ARRAY[1, 2] AS ARRAY(BIGINT)) " ,
write = {
2025-02-13 22:19:49 +01:00
" bigquery " : " ARRAY<INT64>[1, 2] " ,
2025-02-13 20:58:22 +01:00
" duckdb " : " CAST([1, 2] AS BIGINT[]) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CAST(ARRAY[1, 2] AS ARRAY(BIGINT)) " ,
2025-02-13 20:44:18 +01:00
" spark " : " CAST(ARRAY(1, 2) AS ARRAY<BIGINT>) " ,
2025-02-13 21:30:28 +01:00
" snowflake " : " CAST([1, 2] AS ARRAY(BIGINT)) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
2025-02-13 21:30:28 +01:00
" CAST(MAP(ARRAY[ ' key ' ], ARRAY[1]) AS MAP(VARCHAR, INT)) " ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 21:30:28 +01:00
" duckdb " : " CAST(MAP([ ' key ' ], [1]) AS MAP(TEXT, INT)) " ,
" presto " : " CAST(MAP(ARRAY[ ' key ' ], ARRAY[1]) AS MAP(VARCHAR, INTEGER)) " ,
" hive " : " CAST(MAP( ' key ' , 1) AS MAP<STRING, INT>) " ,
" snowflake " : " CAST(OBJECT_CONSTRUCT( ' key ' , 1) AS MAP(VARCHAR, INT)) " ,
" spark " : " CAST(MAP_FROM_ARRAYS(ARRAY( ' key ' ), ARRAY(1)) AS MAP<STRING, INT>) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CAST(MAP(ARRAY[ ' a ' , ' b ' , ' c ' ], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INT))) " ,
write = {
" bigquery " : " CAST(MAP([ ' a ' , ' b ' , ' c ' ], [[1], [2], [3]]) AS MAP<STRING, ARRAY<INT64>>) " ,
2025-02-13 20:58:22 +01:00
" duckdb " : " CAST(MAP([ ' a ' , ' b ' , ' c ' ], [[1], [2], [3]]) AS MAP(TEXT, INT[])) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CAST(MAP(ARRAY[ ' a ' , ' b ' , ' c ' ], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER))) " ,
" hive " : " CAST(MAP( ' a ' , ARRAY(1), ' b ' , ARRAY(2), ' c ' , ARRAY(3)) AS MAP<STRING, ARRAY<INT>>) " ,
" spark " : " CAST(MAP_FROM_ARRAYS(ARRAY( ' a ' , ' b ' , ' c ' ), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>) " ,
2025-02-13 21:30:28 +01:00
" snowflake " : " CAST(OBJECT_CONSTRUCT( ' a ' , [1], ' b ' , [2], ' c ' , [3]) AS MAP(VARCHAR, ARRAY(INT))) " ,
2025-02-13 06:15:54 +01:00
} ,
)
2025-02-13 20:55:29 +01:00
self . validate_all (
" CAST(x AS TIME(5) WITH TIME ZONE) " ,
write = {
" duckdb " : " CAST(x AS TIMETZ) " ,
" postgres " : " CAST(x AS TIMETZ(5)) " ,
" presto " : " CAST(x AS TIME(5) WITH TIME ZONE) " ,
" redshift " : " CAST(x AS TIME(5) WITH TIME ZONE) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CAST(x AS TIMESTAMP(9) WITH TIME ZONE) " ,
write = {
2025-02-13 15:52:09 +01:00
" bigquery " : " CAST(x AS TIMESTAMP) " ,
2025-02-13 20:55:29 +01:00
" duckdb " : " CAST(x AS TIMESTAMPTZ) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CAST(x AS TIMESTAMP(9) WITH TIME ZONE) " ,
2025-02-13 15:46:19 +01:00
" hive " : " CAST(x AS TIMESTAMP) " ,
" spark " : " CAST(x AS TIMESTAMP) " ,
2025-02-13 06:15:54 +01:00
} ,
)
def test_regex ( self ) :
2025-02-13 20:58:22 +01:00
self . validate_all (
" REGEXP_REPLACE( ' abcd ' , ' [ab] ' ) " ,
write = {
" presto " : " REGEXP_REPLACE( ' abcd ' , ' [ab] ' , ' ' ) " ,
" spark " : " REGEXP_REPLACE( ' abcd ' , ' [ab] ' , ' ' ) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" REGEXP_LIKE(a, ' x ' ) " ,
write = {
" duckdb " : " REGEXP_MATCHES(a, ' x ' ) " ,
" presto " : " REGEXP_LIKE(a, ' x ' ) " ,
" hive " : " a RLIKE ' x ' " ,
" spark " : " a RLIKE ' x ' " ,
} ,
)
self . validate_all (
" SPLIT(x, ' a. ' ) " ,
write = {
" duckdb " : " STR_SPLIT(x, ' a. ' ) " ,
" presto " : " SPLIT(x, ' a. ' ) " ,
2025-02-13 21:55:19 +01:00
" hive " : " SPLIT(x, CONCAT( ' \\ \\ Q ' , ' a. ' , ' \\ \\ E ' )) " ,
" spark " : " SPLIT(x, CONCAT( ' \\ \\ Q ' , ' a. ' , ' \\ \\ E ' )) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" REGEXP_SPLIT(x, ' a. ' ) " ,
write = {
" duckdb " : " STR_SPLIT_REGEX(x, ' a. ' ) " ,
" presto " : " REGEXP_SPLIT(x, ' a. ' ) " ,
" hive " : " SPLIT(x, ' a. ' ) " ,
" spark " : " SPLIT(x, ' a. ' ) " ,
} ,
)
self . validate_all (
" CARDINALITY(x) " ,
write = {
" duckdb " : " ARRAY_LENGTH(x) " ,
" presto " : " CARDINALITY(x) " ,
" hive " : " SIZE(x) " ,
" spark " : " SIZE(x) " ,
} ,
)
2025-02-13 15:53:39 +01:00
self . validate_all (
" ARRAY_JOIN(x, ' - ' , ' a ' ) " ,
write = {
" hive " : " CONCAT_WS( ' - ' , x) " ,
" spark " : " ARRAY_JOIN(x, ' - ' , ' a ' ) " ,
} ,
)
2025-02-13 21:30:28 +01:00
self . validate_all (
2025-02-13 22:00:29 +01:00
" STRPOS(haystack, needle, occurrence) " ,
write = {
" bigquery " : " INSTR(haystack, needle, 1, occurrence) " ,
" oracle " : " INSTR(haystack, needle, 1, occurrence) " ,
" presto " : " STRPOS(haystack, needle, occurrence) " ,
" tableau " : " FINDNTH(haystack, needle, occurrence) " ,
" trino " : " STRPOS(haystack, needle, occurrence) " ,
" teradata " : " INSTR(haystack, needle, 1, occurrence) " ,
2025-02-13 21:30:28 +01:00
} ,
)
2025-02-13 06:15:54 +01:00
2025-02-13 15:52:09 +01:00
def test_interval_plural_to_singular ( self ) :
# Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals
unit_to_expected = {
2025-02-13 21:19:14 +01:00
" SeCoNds " : " SECOND " ,
" minutes " : " MINUTE " ,
" hours " : " HOUR " ,
" days " : " DAY " ,
" months " : " MONTH " ,
" years " : " YEAR " ,
2025-02-13 15:52:09 +01:00
}
for unit , expected in unit_to_expected . items ( ) :
self . validate_all (
f " SELECT INTERVAL ' 1 ' { unit } " ,
write = {
" bigquery " : f " SELECT INTERVAL ' 1 ' { expected } " ,
" presto " : f " SELECT INTERVAL ' 1 ' { expected } " ,
" trino " : f " SELECT INTERVAL ' 1 ' { expected } " ,
2025-02-13 21:37:09 +01:00
" mysql " : f " SELECT INTERVAL ' 1 ' { expected } " ,
" doris " : f " SELECT INTERVAL ' 1 ' { expected } " ,
2025-02-13 15:52:09 +01:00
} ,
)
2025-02-13 06:15:54 +01:00
def test_time ( self ) :
2025-02-13 20:55:29 +01:00
expr = parse_one ( " TIME(7) WITH TIME ZONE " , into = exp . DataType , read = " presto " )
self . assertEqual ( expr . this , exp . DataType . Type . TIMETZ )
2025-02-13 15:52:09 +01:00
self . validate_identity ( " FROM_UNIXTIME(a, b) " )
self . validate_identity ( " FROM_UNIXTIME(a, b, c) " )
self . validate_identity ( " TRIM(a, b) " )
self . validate_identity ( " VAR_POP(a) " )
2025-02-13 15:53:39 +01:00
self . validate_all (
" SELECT FROM_UNIXTIME(col) FROM tbl " ,
write = {
" presto " : " SELECT FROM_UNIXTIME(col) FROM tbl " ,
" spark " : " SELECT CAST(FROM_UNIXTIME(col) AS TIMESTAMP) FROM tbl " ,
" trino " : " SELECT FROM_UNIXTIME(col) FROM tbl " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" DATE_FORMAT(x, ' % Y- % m- %d % H: %i : % S ' ) " ,
write = {
2025-02-13 21:17:09 +01:00
" bigquery " : " FORMAT_DATE( ' % Y- % m- %d % H: % M: % S ' , x) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " STRFTIME(x, ' % Y- % m- %d % H: % M: % S ' ) " ,
2025-02-13 15:09:58 +01:00
" presto " : " DATE_FORMAT(x, ' % Y- % m- %d % T ' ) " ,
2025-02-13 06:15:54 +01:00
" hive " : " DATE_FORMAT(x, ' yyyy-MM-dd HH:mm:ss ' ) " ,
" spark " : " DATE_FORMAT(x, ' yyyy-MM-dd HH:mm:ss ' ) " ,
} ,
)
self . validate_all (
" DATE_PARSE(x, ' % Y- % m- %d % H: %i : % S ' ) " ,
write = {
" duckdb " : " STRPTIME(x, ' % Y- % m- %d % H: % M: % S ' ) " ,
2025-02-13 15:09:58 +01:00
" presto " : " DATE_PARSE(x, ' % Y- % m- %d % T ' ) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CAST(x AS TIMESTAMP) " ,
" spark " : " TO_TIMESTAMP(x, ' yyyy-MM-dd HH:mm:ss ' ) " ,
} ,
)
self . validate_all (
" DATE_PARSE(x, ' % Y- % m- %d ' ) " ,
write = {
" duckdb " : " STRPTIME(x, ' % Y- % m- %d ' ) " ,
" presto " : " DATE_PARSE(x, ' % Y- % m- %d ' ) " ,
" hive " : " CAST(x AS TIMESTAMP) " ,
" spark " : " TO_TIMESTAMP(x, ' yyyy-MM-dd ' ) " ,
} ,
)
2025-02-13 15:09:58 +01:00
self . validate_all (
" DATE_FORMAT(x, ' % T ' ) " ,
write = {
" hive " : " DATE_FORMAT(x, ' HH:mm:ss ' ) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" DATE_PARSE(SUBSTR(x, 1, 10), ' % Y- % m- %d ' ) " ,
write = {
2025-02-13 21:55:19 +01:00
" duckdb " : " STRPTIME(SUBSTRING(x, 1, 10), ' % Y- % m- %d ' ) " ,
" presto " : " DATE_PARSE(SUBSTRING(x, 1, 10), ' % Y- % m- %d ' ) " ,
" hive " : " CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP) " ,
" spark " : " TO_TIMESTAMP(SUBSTRING(x, 1, 10), ' yyyy-MM-dd ' ) " ,
} ,
)
self . validate_all (
" DATE_PARSE(SUBSTRING(x, 1, 10), ' % Y- % m- %d ' ) " ,
write = {
" duckdb " : " STRPTIME(SUBSTRING(x, 1, 10), ' % Y- % m- %d ' ) " ,
" presto " : " DATE_PARSE(SUBSTRING(x, 1, 10), ' % Y- % m- %d ' ) " ,
" hive " : " CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP) " ,
" spark " : " TO_TIMESTAMP(SUBSTRING(x, 1, 10), ' yyyy-MM-dd ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" FROM_UNIXTIME(x) " ,
write = {
2025-02-13 15:09:58 +01:00
" duckdb " : " TO_TIMESTAMP(x) " ,
2025-02-13 06:15:54 +01:00
" presto " : " FROM_UNIXTIME(x) " ,
" hive " : " FROM_UNIXTIME(x) " ,
2025-02-13 15:53:39 +01:00
" spark " : " CAST(FROM_UNIXTIME(x) AS TIMESTAMP) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" TO_UNIXTIME(x) " ,
write = {
" duckdb " : " EPOCH(x) " ,
" presto " : " TO_UNIXTIME(x) " ,
" hive " : " UNIX_TIMESTAMP(x) " ,
" spark " : " UNIX_TIMESTAMP(x) " ,
} ,
)
self . validate_all (
2025-02-13 21:19:14 +01:00
" DATE_ADD( ' DAY ' , 1, x) " ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 21:19:14 +01:00
" duckdb " : " x + INTERVAL 1 DAY " ,
" presto " : " DATE_ADD( ' DAY ' , 1, x) " ,
2025-02-13 06:15:54 +01:00
" hive " : " DATE_ADD(x, 1) " ,
" spark " : " DATE_ADD(x, 1) " ,
} ,
)
2025-02-13 20:58:22 +01:00
self . validate_all (
" DATE_ADD( ' DAY ' , 1 * -1, x) " ,
write = {
" presto " : " DATE_ADD( ' DAY ' , 1 * -1, x) " ,
} ,
)
2025-02-13 15:08:15 +01:00
self . validate_all (
" NOW() " ,
write = {
2025-02-13 15:09:58 +01:00
" presto " : " CURRENT_TIMESTAMP " ,
2025-02-13 15:08:15 +01:00
" hive " : " CURRENT_TIMESTAMP() " ,
} ,
)
2025-02-13 21:17:09 +01:00
self . validate_all (
" SELECT DATE_ADD( ' DAY ' , 1, CAST(CURRENT_DATE AS TIMESTAMP)) " ,
read = {
" redshift " : " SELECT DATEADD(DAY, 1, CURRENT_DATE) " ,
} ,
)
2025-02-13 15:09:58 +01:00
self . validate_all (
2025-02-13 21:58:22 +01:00
" ((DAY_OF_WEEK(CAST(CAST(TRY_CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP WITH TIME ZONE) AS TIMESTAMP) AS DATE)) % 7) + 1) " ,
2025-02-13 21:54:13 +01:00
read = {
2025-02-13 15:46:19 +01:00
" spark " : " DAYOFWEEK(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 21:54:13 +01:00
} ,
)
self . validate_all (
" DAY_OF_WEEK(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
read = {
" duckdb " : " ISODOW(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
} ,
write = {
" spark " : " ((DAYOFWEEK(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) % 7) + 1) " ,
2025-02-13 15:46:19 +01:00
" presto " : " DAY_OF_WEEK(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 21:54:13 +01:00
" duckdb " : " ISODOW(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
2025-02-13 15:46:19 +01:00
" DAY_OF_MONTH(timestamp ' 2012-08-08 01:00:00 ' ) " ,
2025-02-13 15:09:58 +01:00
write = {
2025-02-13 15:46:19 +01:00
" spark " : " DAYOFMONTH(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" presto " : " DAY_OF_MONTH(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" duckdb " : " DAYOFMONTH(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
2025-02-13 15:46:19 +01:00
" DAY_OF_YEAR(timestamp ' 2012-08-08 01:00:00 ' ) " ,
2025-02-13 15:09:58 +01:00
write = {
2025-02-13 15:46:19 +01:00
" spark " : " DAYOFYEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" presto " : " DAY_OF_YEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" duckdb " : " DAYOFYEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
2025-02-13 15:46:19 +01:00
" WEEK_OF_YEAR(timestamp ' 2012-08-08 01:00:00 ' ) " ,
2025-02-13 15:09:58 +01:00
write = {
2025-02-13 15:46:19 +01:00
" spark " : " WEEKOFYEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" presto " : " WEEK_OF_YEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
" duckdb " : " WEEKOFYEAR(CAST( ' 2012-08-08 01:00:00 ' AS TIMESTAMP)) " ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
2025-02-13 21:20:36 +01:00
" SELECT CAST( ' 2012-10-31 00:00 ' AS TIMESTAMP) AT TIME ZONE ' America/Sao_Paulo ' " ,
2025-02-13 15:09:58 +01:00
write = {
" spark " : " SELECT FROM_UTC_TIMESTAMP(CAST( ' 2012-10-31 00:00 ' AS TIMESTAMP), ' America/Sao_Paulo ' ) " ,
2025-02-13 21:20:36 +01:00
" presto " : " SELECT AT_TIMEZONE(CAST( ' 2012-10-31 00:00 ' AS TIMESTAMP), ' America/Sao_Paulo ' ) " ,
2025-02-13 15:09:58 +01:00
} ,
)
2025-02-13 21:06:11 +01:00
self . validate_all (
2025-02-13 22:19:49 +01:00
" SELECT AT_TIMEZONE(CAST( ' 2012-10-31 00:00 ' AS TIMESTAMP WITH TIME ZONE), ' America/Sao_Paulo ' ) " ,
2025-02-13 21:06:11 +01:00
read = {
2025-02-13 21:20:36 +01:00
" spark " : " SELECT FROM_UTC_TIMESTAMP(TIMESTAMP ' 2012-10-31 00:00 ' , ' America/Sao_Paulo ' ) " ,
2025-02-13 21:06:11 +01:00
} ,
)
2025-02-13 20:58:22 +01:00
self . validate_all (
" CAST(x AS TIMESTAMP) " ,
write = { " presto " : " CAST(x AS TIMESTAMP) " } ,
read = { " mysql " : " CAST(x AS DATETIME) " , " clickhouse " : " CAST(x AS DATETIME64) " } ,
)
2025-02-13 21:02:36 +01:00
self . validate_all (
" CAST(x AS TIMESTAMP) " ,
read = { " mysql " : " TIMESTAMP(x) " } ,
)
2025-02-13 21:09:41 +01:00
# this case isn't really correct, but it's a fall back for mysql's version
self . validate_all (
" TIMESTAMP(x, ' 12:00:00 ' ) " ,
write = {
" duckdb " : " TIMESTAMP(x, ' 12:00:00 ' ) " ,
" presto " : " TIMESTAMP(x, ' 12:00:00 ' ) " ,
} ,
)
2025-02-13 21:17:51 +01:00
self . validate_all (
" DATE_ADD( ' DAY ' , CAST(x AS BIGINT), y) " ,
write = {
" presto " : " DATE_ADD( ' DAY ' , CAST(x AS BIGINT), y) " ,
} ,
read = {
" presto " : " DATE_ADD( ' DAY ' , x, y) " ,
} ,
)
self . validate_identity ( " DATE_ADD( ' DAY ' , 1, y) " )
2025-02-13 15:09:58 +01:00
2025-02-13 21:43:00 +01:00
self . validate_all (
" SELECT DATE_ADD( ' MINUTE ' , 30, col) " ,
write = {
" presto " : " SELECT DATE_ADD( ' MINUTE ' , 30, col) " ,
" trino " : " SELECT DATE_ADD( ' MINUTE ' , 30, col) " ,
} ,
)
2025-02-13 21:51:42 +01:00
self . validate_identity ( " DATE_ADD( ' DAY ' , FLOOR(5), y) " )
self . validate_identity (
""" SELECT DATE_ADD( ' DAY ' , MOD(5, 2.5), y), DATE_ADD( ' DAY ' , CEIL(5.5), y) """ ,
""" SELECT DATE_ADD( ' DAY ' , CAST(5 % 2.5 AS BIGINT), y), DATE_ADD( ' DAY ' , CAST(CEIL(5.5) AS BIGINT), y) """ ,
)
self . validate_all (
" DATE_ADD( ' MINUTE ' , CAST(FLOOR(CAST(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP) AS DOUBLE) / NULLIF(30, 0)) * 30 AS BIGINT), col) " ,
read = {
" spark " : " TIMESTAMPADD(MINUTE, FLOOR(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP)/30)*30, col) " ,
} ,
)
2025-02-13 06:15:54 +01:00
def test_ddl ( self ) :
self . validate_all (
" CREATE TABLE test WITH (FORMAT = ' PARQUET ' ) AS SELECT 1 " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE test AS SELECT 1 " ,
2025-02-13 14:40:43 +01:00
" presto " : " CREATE TABLE test WITH (FORMAT= ' PARQUET ' ) AS SELECT 1 " ,
2025-02-13 06:15:54 +01:00
" hive " : " CREATE TABLE test STORED AS PARQUET AS SELECT 1 " ,
2025-02-13 14:31:47 +01:00
" spark " : " CREATE TABLE test USING PARQUET AS SELECT 1 " ,
2025-02-13 06:15:54 +01:00
} ,
)
2025-02-13 14:54:32 +01:00
self . validate_all (
2025-02-13 15:52:09 +01:00
" CREATE TABLE test STORED AS ' PARQUET ' AS SELECT 1 " ,
2025-02-13 14:54:32 +01:00
write = {
" duckdb " : " CREATE TABLE test AS SELECT 1 " ,
" presto " : " CREATE TABLE test WITH (FORMAT= ' PARQUET ' ) AS SELECT 1 " ,
" hive " : " CREATE TABLE test STORED AS PARQUET AS SELECT 1 " ,
" spark " : " CREATE TABLE test USING PARQUET AS SELECT 1 " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CREATE TABLE test WITH (FORMAT = ' PARQUET ' , X = ' 1 ' , Z = ' 2 ' ) AS SELECT 1 " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE test AS SELECT 1 " ,
2025-02-13 14:40:43 +01:00
" presto " : " CREATE TABLE test WITH (FORMAT= ' PARQUET ' , X= ' 1 ' , Z= ' 2 ' ) AS SELECT 1 " ,
" hive " : " CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ( ' X ' = ' 1 ' , ' Z ' = ' 2 ' ) AS SELECT 1 " ,
" spark " : " CREATE TABLE test USING PARQUET TBLPROPERTIES ( ' X ' = ' 1 ' , ' Z ' = ' 2 ' ) AS SELECT 1 " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
2025-02-13 14:40:43 +01:00
" CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY[ ' y ' , ' z ' ]) " ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE x (w TEXT, y INT, z INT) " ,
2025-02-13 14:40:43 +01:00
" presto " : " CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY[ ' y ' , ' z ' ]) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT) " ,
2025-02-13 21:27:51 +01:00
" spark " : " CREATE TABLE x (w STRING, y INT, z INT) PARTITIONED BY (y, z) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CREATE TABLE x WITH (bucket_by = ARRAY[ ' y ' ], bucket_count = 64) AS SELECT 1 AS y " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE x AS SELECT 1 AS y " ,
2025-02-13 14:40:43 +01:00
" presto " : " CREATE TABLE x WITH (bucket_by=ARRAY[ ' y ' ], bucket_count=64) AS SELECT 1 AS y " ,
" hive " : " CREATE TABLE x TBLPROPERTIES ( ' bucket_by ' =ARRAY( ' y ' ), ' bucket_count ' =64) AS SELECT 1 AS y " ,
" spark " : " CREATE TABLE x TBLPROPERTIES ( ' bucket_by ' =ARRAY( ' y ' ), ' bucket_count ' =64) AS SELECT 1 AS y " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR)) " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT)) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR)) " ,
" hive " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>) " ,
" spark " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>) " ,
} ,
)
self . validate_all (
" CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR))) " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT))) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR))) " ,
" hive " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>) " ,
" spark " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>) " ,
} ,
)
2025-02-13 14:40:43 +01:00
self . validate_all (
2025-02-13 06:15:54 +01:00
" SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname " ,
2025-02-13 14:40:43 +01:00
write = {
2025-02-13 21:03:38 +01:00
" presto " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname " ,
" spark " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST " ,
2025-02-13 14:40:43 +01:00
} ,
2025-02-13 06:15:54 +01:00
)
2025-02-13 21:09:41 +01:00
self . validate_all (
" CREATE OR REPLACE VIEW x (cola) SELECT 1 as cola " ,
write = {
" spark " : " CREATE OR REPLACE VIEW x (cola) AS SELECT 1 AS cola " ,
" presto " : " CREATE OR REPLACE VIEW x AS SELECT 1 AS cola " ,
} ,
)
self . validate_all (
2025-02-13 21:17:51 +01:00
""" CREATE TABLE IF NOT EXISTS x ( " cola " INTEGER, " ds " TEXT) COMMENT ' comment ' WITH (PARTITIONED BY=( " ds " )) """ ,
2025-02-13 21:09:41 +01:00
write = {
2025-02-13 21:17:51 +01:00
" spark " : " CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) COMMENT ' comment ' PARTITIONED BY (`ds`) " ,
" presto " : """ CREATE TABLE IF NOT EXISTS x ( " cola " INTEGER, " ds " VARCHAR) COMMENT ' comment ' WITH (PARTITIONED_BY=ARRAY[ ' ds ' ]) """ ,
2025-02-13 21:09:41 +01:00
} ,
)
2025-02-13 21:54:13 +01:00
self . validate_identity ( """ CREATE OR REPLACE VIEW v SECURITY DEFINER AS SELECT id FROM t """ )
self . validate_identity ( """ CREATE OR REPLACE VIEW v SECURITY INVOKER AS SELECT id FROM t """ )
2025-02-13 06:15:54 +01:00
def test_quotes ( self ) :
self . validate_all (
" ' ' ' ' " ,
write = {
" duckdb " : " ' ' ' ' " ,
" presto " : " ' ' ' ' " ,
" hive " : " ' \\ ' ' " ,
" spark " : " ' \\ ' ' " ,
} ,
)
self . validate_all (
" ' x ' " ,
write = {
" duckdb " : " ' x ' " ,
" presto " : " ' x ' " ,
" hive " : " ' x ' " ,
" spark " : " ' x ' " ,
} ,
)
self . validate_all (
" ' ' ' x ' ' ' " ,
write = {
" duckdb " : " ' ' ' x ' ' ' " ,
" presto " : " ' ' ' x ' ' ' " ,
" hive " : " ' \\ ' x \\ ' ' " ,
" spark " : " ' \\ ' x \\ ' ' " ,
} ,
)
self . validate_all (
" ' ' ' x ' " ,
write = {
" duckdb " : " ' ' ' x ' " ,
" presto " : " ' ' ' x ' " ,
" hive " : " ' \\ ' x ' " ,
" spark " : " ' \\ ' x ' " ,
} ,
)
self . validate_all (
" x IN ( ' a ' , ' a ' ' b ' ) " ,
write = {
" duckdb " : " x IN ( ' a ' , ' a ' ' b ' ) " ,
" presto " : " x IN ( ' a ' , ' a ' ' b ' ) " ,
" hive " : " x IN ( ' a ' , ' a \\ ' b ' ) " ,
" spark " : " x IN ( ' a ' , ' a \\ ' b ' ) " ,
} ,
)
def test_unnest ( self ) :
self . validate_all (
" SELECT a FROM x CROSS JOIN UNNEST(ARRAY(y)) AS t (a) " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a) " ,
" hive " : " SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
" spark " : " SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
} ,
)
2025-02-13 15:52:09 +01:00
self . validate_all (
" SELECT a FROM x CROSS JOIN UNNEST(ARRAY(y)) AS t (a) CROSS JOIN b " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a) CROSS JOIN b " ,
" hive " : " SELECT a FROM x CROSS JOIN b LATERAL VIEW EXPLODE(ARRAY(y)) t AS a " ,
} ,
)
2025-02-13 21:17:51 +01:00
def test_unicode_string ( self ) :
for prefix in ( " u& " , " U& " ) :
2025-02-13 21:18:34 +01:00
self . validate_all (
2025-02-13 21:17:51 +01:00
f " { prefix } ' Hello winter \\ 2603 ! ' " ,
2025-02-13 21:18:34 +01:00
write = {
2025-02-13 21:39:30 +01:00
" oracle " : " U ' Hello winter \\ 2603 ! ' " ,
2025-02-13 21:18:34 +01:00
" presto " : " U& ' Hello winter \\ 2603 ! ' " ,
" snowflake " : " ' Hello winter \\ u2603 ! ' " ,
" spark " : " ' Hello winter \\ u2603 ! ' " ,
} ,
2025-02-13 21:17:51 +01:00
)
2025-02-13 21:18:34 +01:00
self . validate_all (
2025-02-13 21:17:51 +01:00
f " { prefix } ' Hello winter #2603 ! ' UESCAPE ' # ' " ,
2025-02-13 21:18:34 +01:00
write = {
2025-02-13 21:39:30 +01:00
" oracle " : " U ' Hello winter \\ 2603 ! ' " ,
2025-02-13 21:18:34 +01:00
" presto " : " U& ' Hello winter #2603 ! ' UESCAPE ' # ' " ,
" snowflake " : " ' Hello winter \\ u2603 ! ' " ,
" spark " : " ' Hello winter \\ u2603 ! ' " ,
} ,
2025-02-13 21:17:51 +01:00
)
2025-02-13 15:09:58 +01:00
2025-02-13 21:17:51 +01:00
def test_presto ( self ) :
2025-02-13 21:41:14 +01:00
self . assertEqual (
exp . func ( " md5 " , exp . func ( " concat " , exp . cast ( " x " , " text " ) , exp . Literal . string ( " s " ) ) ) . sql (
dialect = " presto "
) ,
" LOWER(TO_HEX(MD5(TO_UTF8(CONCAT(CAST(x AS VARCHAR), CAST( ' s ' AS VARCHAR)))))) " ,
)
2025-02-13 21:20:36 +01:00
with self . assertLogs ( helper_logger ) :
2025-02-13 21:17:09 +01:00
self . validate_all (
" SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, ' 1 ' )]), id), quantity) FROM my_table " ,
write = {
" postgres " : UnsupportedError ,
" presto " : " SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, ' 1 ' )]), id), quantity) FROM my_table " ,
} ,
)
self . validate_all (
" SELECT ELEMENT_AT(ARRAY[1, 2, 3], 4) " ,
write = {
" " : " SELECT ARRAY(1, 2, 3)[3] " ,
" bigquery " : " SELECT [1, 2, 3][SAFE_ORDINAL(4)] " ,
" postgres " : " SELECT (ARRAY[1, 2, 3])[4] " ,
" presto " : " SELECT ELEMENT_AT(ARRAY[1, 2, 3], 4) " ,
} ,
)
2025-02-13 21:52:55 +01:00
self . validate_identity ( " SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d) " )
2025-02-13 21:19:14 +01:00
self . validate_identity ( " SELECT a FROM test TABLESAMPLE BERNOULLI (50) " )
self . validate_identity ( " SELECT a FROM test TABLESAMPLE SYSTEM (75) " )
2025-02-13 21:17:51 +01:00
self . validate_identity ( " string_agg(x, ' , ' ) " , " ARRAY_JOIN(ARRAY_AGG(x), ' , ' ) " )
self . validate_identity ( " SELECT * FROM x OFFSET 1 LIMIT 1 " )
self . validate_identity ( " SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY " )
self . validate_identity ( " SELECT BOOL_OR(a > 10) FROM asd AS T(a) " )
self . validate_identity ( " SELECT * FROM (VALUES (1)) " )
self . validate_identity ( " START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE " )
self . validate_identity ( " START TRANSACTION ISOLATION LEVEL REPEATABLE READ " )
self . validate_identity ( " APPROX_PERCENTILE(a, b, c, d) " )
self . validate_identity (
" SELECT SPLIT_TO_MAP( ' a:1;b:2;a:3 ' , ' ; ' , ' : ' , (k, v1, v2) -> CONCAT(v1, v2)) "
)
self . validate_identity (
" SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955 "
)
self . validate_identity (
" SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST( ' 2022-03-23 09:59:29.803 Europe/Vienna ' AS TIMESTAMP) "
)
2025-02-13 21:31:23 +01:00
self . validate_identity (
" SELECT origin_state, destination_state, origin_zip, SUM(package_weight) FROM shipping GROUP BY ALL CUBE (origin_state, destination_state), ROLLUP (origin_state, origin_zip) "
)
self . validate_identity (
" SELECT origin_state, destination_state, origin_zip, SUM(package_weight) FROM shipping GROUP BY DISTINCT CUBE (origin_state, destination_state), ROLLUP (origin_state, origin_zip) "
)
self . validate_identity (
" SELECT JSON_EXTRACT_SCALAR(CAST(extra AS JSON), ' $.value_b ' ), COUNT(*) FROM table_a GROUP BY DISTINCT (JSON_EXTRACT_SCALAR(CAST(extra AS JSON), ' $.value_b ' )) "
)
2025-02-13 21:17:51 +01:00
2025-02-13 21:19:14 +01:00
self . validate_all (
" SELECT LAST_DAY_OF_MONTH(CAST( ' 2008-11-25 ' AS DATE)) " ,
read = {
" duckdb " : " SELECT LAST_DAY(CAST( ' 2008-11-25 ' AS DATE)) " ,
} ,
write = {
" duckdb " : " SELECT LAST_DAY(CAST( ' 2008-11-25 ' AS DATE)) " ,
" presto " : " SELECT LAST_DAY_OF_MONTH(CAST( ' 2008-11-25 ' AS DATE)) " ,
} ,
)
2025-02-13 21:09:41 +01:00
self . validate_all (
" SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
read = {
" bigquery " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" clickhouse " : " SELECT argMax(a.id, a.timestamp) FROM a " ,
" duckdb " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" snowflake " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" spark " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" teradata " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
} ,
write = {
" bigquery " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" clickhouse " : " SELECT argMax(a.id, a.timestamp) FROM a " ,
" duckdb " : " SELECT ARG_MAX(a.id, a.timestamp) FROM a " ,
" presto " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" snowflake " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" spark " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
" teradata " : " SELECT MAX_BY(a.id, a.timestamp) FROM a " ,
} ,
)
self . validate_all (
" SELECT MIN_BY(a.id, a.timestamp, 3) FROM a " ,
write = {
" clickhouse " : " SELECT argMin(a.id, a.timestamp) FROM a " ,
2025-02-13 21:57:37 +01:00
" duckdb " : " SELECT ARG_MIN(a.id, a.timestamp, 3) FROM a " ,
2025-02-13 21:09:41 +01:00
" presto " : " SELECT MIN_BY(a.id, a.timestamp, 3) FROM a " ,
" snowflake " : " SELECT MIN_BY(a.id, a.timestamp, 3) FROM a " ,
" spark " : " SELECT MIN_BY(a.id, a.timestamp) FROM a " ,
" teradata " : " SELECT MIN_BY(a.id, a.timestamp, 3) FROM a " ,
} ,
)
2025-02-13 21:02:36 +01:00
self . validate_all (
""" JSON ' " foo " ' """ ,
write = {
" bigquery " : """ PARSE_JSON( ' " foo " ' ) """ ,
2025-02-13 21:29:39 +01:00
" postgres " : """ CAST( ' " foo " ' AS JSON) """ ,
2025-02-13 21:02:36 +01:00
" presto " : """ JSON_PARSE( ' " foo " ' ) """ ,
" snowflake " : """ PARSE_JSON( ' " foo " ' ) """ ,
} ,
)
2025-02-13 20:58:22 +01:00
self . validate_all (
" SELECT ROW(1, 2) " ,
write = {
" presto " : " SELECT ROW(1, 2) " ,
" spark " : " SELECT STRUCT(1, 2) " ,
} ,
)
self . validate_all (
" ARBITRARY(x) " ,
read = {
" bigquery " : " ANY_VALUE(x) " ,
" clickhouse " : " any(x) " ,
" databricks " : " ANY_VALUE(x) " ,
" doris " : " ANY_VALUE(x) " ,
" drill " : " ANY_VALUE(x) " ,
" duckdb " : " ANY_VALUE(x) " ,
" hive " : " FIRST(x) " ,
" mysql " : " ANY_VALUE(x) " ,
" oracle " : " ANY_VALUE(x) " ,
" redshift " : " ANY_VALUE(x) " ,
" snowflake " : " ANY_VALUE(x) " ,
" spark " : " ANY_VALUE(x) " ,
" spark2 " : " FIRST(x) " ,
} ,
write = {
" bigquery " : " ANY_VALUE(x) " ,
" clickhouse " : " any(x) " ,
" databricks " : " ANY_VALUE(x) " ,
" doris " : " ANY_VALUE(x) " ,
" drill " : " ANY_VALUE(x) " ,
" duckdb " : " ANY_VALUE(x) " ,
" hive " : " FIRST(x) " ,
" mysql " : " ANY_VALUE(x) " ,
" oracle " : " ANY_VALUE(x) " ,
" postgres " : " MAX(x) " ,
" presto " : " ARBITRARY(x) " ,
" redshift " : " ANY_VALUE(x) " ,
" snowflake " : " ANY_VALUE(x) " ,
" spark " : " ANY_VALUE(x) " ,
" spark2 " : " FIRST(x) " ,
" sqlite " : " MAX(x) " ,
" tsql " : " MAX(x) " ,
} ,
)
2025-02-13 20:48:36 +01:00
self . validate_all (
" STARTS_WITH( ' abc ' , ' a ' ) " ,
read = { " spark " : " STARTSWITH( ' abc ' , ' a ' ) " } ,
write = {
" presto " : " STARTS_WITH( ' abc ' , ' a ' ) " ,
2025-02-13 20:51:40 +01:00
" snowflake " : " STARTSWITH( ' abc ' , ' a ' ) " ,
2025-02-13 20:48:36 +01:00
" spark " : " STARTSWITH( ' abc ' , ' a ' ) " ,
} ,
)
self . validate_all (
" IS_NAN(x) " ,
read = {
" spark " : " ISNAN(x) " ,
} ,
write = {
" presto " : " IS_NAN(x) " ,
" spark " : " ISNAN(x) " ,
" spark2 " : " ISNAN(x) " ,
} ,
)
2025-02-13 15:58:40 +01:00
self . validate_all ( " VALUES 1, 2, 3 " , write = { " presto " : " VALUES (1), (2), (3) " } )
2025-02-13 21:19:14 +01:00
self . validate_all ( " INTERVAL ' 1 day ' " , write = { " trino " : " INTERVAL ' 1 ' DAY " } )
self . validate_all ( " (5 * INTERVAL ' 7 ' DAY) " , read = { " " : " INTERVAL ' 5 ' WEEK " } )
self . validate_all ( " (5 * INTERVAL ' 7 ' DAY) " , read = { " " : " INTERVAL ' 5 ' WEEKS " } )
2025-02-13 15:58:40 +01:00
self . validate_all (
" SELECT SUBSTRING(a, 1, 3), SUBSTRING(a, LENGTH(a) - (3 - 1)) " ,
read = {
" redshift " : " SELECT LEFT(a, 3), RIGHT(a, 3) " ,
} ,
)
2025-02-13 15:57:23 +01:00
self . validate_all (
" WITH RECURSIVE t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t " ,
read = {
" postgres " : " WITH RECURSIVE t AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t " ,
} ,
)
self . validate_all (
" WITH RECURSIVE t(n, k) AS (SELECT 1 AS n, 2 AS k) SELECT SUM(n) FROM t " ,
read = {
" postgres " : " WITH RECURSIVE t AS (SELECT 1 AS n, 2 as k) SELECT SUM(n) FROM t " ,
} ,
)
self . validate_all (
" WITH RECURSIVE t1(n) AS (SELECT 1 AS n), t2(n) AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2 " ,
read = {
" postgres " : " WITH RECURSIVE t1 AS (SELECT 1 AS n), t2 AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2 " ,
} ,
)
self . validate_all (
" WITH RECURSIVE t(n, _c_0) AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t " ,
read = {
" postgres " : " WITH RECURSIVE t AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t " ,
} ,
)
self . validate_all (
' WITH RECURSIVE t(n, " 1 " ) AS (SELECT n, 1 FROM tbl) SELECT * FROM t ' ,
read = {
" postgres " : " WITH RECURSIVE t AS (SELECT n, 1 FROM tbl) SELECT * FROM t " ,
} ,
)
2025-02-13 15:48:10 +01:00
self . validate_all (
" SELECT JSON_OBJECT(KEY ' key1 ' VALUE 1, KEY ' key2 ' VALUE TRUE) " ,
write = {
" presto " : " SELECT JSON_OBJECT( ' key1 ' : 1, ' key2 ' : TRUE) " ,
} ,
)
2025-02-13 15:46:19 +01:00
self . validate_all (
" ARRAY_AGG(x ORDER BY y DESC) " ,
write = {
" hive " : " COLLECT_LIST(x) " ,
" presto " : " ARRAY_AGG(x ORDER BY y DESC) " ,
" spark " : " COLLECT_LIST(x) " ,
" trino " : " ARRAY_AGG(x ORDER BY y DESC) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
' SELECT a. " b " FROM " foo " ' ,
write = {
" duckdb " : ' SELECT a. " b " FROM " foo " ' ,
" presto " : ' SELECT a. " b " FROM " foo " ' ,
" spark " : " SELECT a.`b` FROM `foo` " ,
} ,
)
self . validate_all (
" SELECT ARRAY[1, 2] " ,
write = {
" bigquery " : " SELECT [1, 2] " ,
2025-02-13 20:58:22 +01:00
" duckdb " : " SELECT [1, 2] " ,
2025-02-13 06:15:54 +01:00
" presto " : " SELECT ARRAY[1, 2] " ,
" spark " : " SELECT ARRAY(1, 2) " ,
} ,
)
self . validate_all (
" SELECT APPROX_DISTINCT(a) FROM foo " ,
write = {
" duckdb " : " SELECT APPROX_COUNT_DISTINCT(a) FROM foo " ,
" presto " : " SELECT APPROX_DISTINCT(a) FROM foo " ,
" hive " : " SELECT APPROX_COUNT_DISTINCT(a) FROM foo " ,
" spark " : " SELECT APPROX_COUNT_DISTINCT(a) FROM foo " ,
} ,
)
self . validate_all (
" SELECT APPROX_DISTINCT(a, 0.1) FROM foo " ,
write = {
" duckdb " : " SELECT APPROX_COUNT_DISTINCT(a) FROM foo " ,
" presto " : " SELECT APPROX_DISTINCT(a, 0.1) FROM foo " ,
" hive " : " SELECT APPROX_COUNT_DISTINCT(a) FROM foo " ,
2025-02-13 14:48:46 +01:00
" spark " : " SELECT APPROX_COUNT_DISTINCT(a, 0.1) FROM foo " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SELECT APPROX_DISTINCT(a, 0.1) FROM foo " ,
write = {
" presto " : " SELECT APPROX_DISTINCT(a, 0.1) FROM foo " ,
" hive " : UnsupportedError ,
2025-02-13 14:48:46 +01:00
" spark " : " SELECT APPROX_COUNT_DISTINCT(a, 0.1) FROM foo " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SELECT JSON_EXTRACT(x, ' $.name ' ) " ,
write = {
" presto " : " SELECT JSON_EXTRACT(x, ' $.name ' ) " ,
" hive " : " SELECT GET_JSON_OBJECT(x, ' $.name ' ) " ,
" spark " : " SELECT GET_JSON_OBJECT(x, ' $.name ' ) " ,
} ,
)
self . validate_all (
" SELECT JSON_EXTRACT_SCALAR(x, ' $.name ' ) " ,
write = {
" presto " : " SELECT JSON_EXTRACT_SCALAR(x, ' $.name ' ) " ,
" hive " : " SELECT GET_JSON_OBJECT(x, ' $.name ' ) " ,
" spark " : " SELECT GET_JSON_OBJECT(x, ' $.name ' ) " ,
} ,
)
self . validate_all (
" ' \u6bdb ' " ,
write = {
" presto " : " ' \u6bdb ' " ,
" hive " : " ' \u6bdb ' " ,
" spark " : " ' \u6bdb ' " ,
} ,
)
self . validate_all (
" SELECT ARRAY_SORT(x, (left, right) -> -1) " ,
write = {
" duckdb " : " SELECT ARRAY_SORT(x) " ,
2025-02-13 21:32:41 +01:00
" presto " : ' SELECT ARRAY_SORT(x, ( " left " , " right " ) -> -1) ' ,
2025-02-13 06:15:54 +01:00
" hive " : " SELECT SORT_ARRAY(x) " ,
" spark " : " SELECT ARRAY_SORT(x, (left, right) -> -1) " ,
} ,
)
self . validate_all (
" SELECT ARRAY_SORT(x) " ,
write = {
" presto " : " SELECT ARRAY_SORT(x) " ,
" hive " : " SELECT SORT_ARRAY(x) " ,
" spark " : " SELECT ARRAY_SORT(x) " ,
} ,
)
self . validate_all (
" SELECT ARRAY_SORT(x, (left, right) -> -1) " ,
write = {
" hive " : UnsupportedError ,
} ,
)
self . validate_all (
" MAP(a, b) " ,
write = {
" hive " : UnsupportedError ,
" spark " : " MAP_FROM_ARRAYS(a, b) " ,
2025-02-13 14:56:25 +01:00
" snowflake " : UnsupportedError ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" MAP(ARRAY(a, b), ARRAY(c, d)) " ,
write = {
" hive " : " MAP(a, c, b, d) " ,
" presto " : " MAP(ARRAY[a, b], ARRAY[c, d]) " ,
" spark " : " MAP_FROM_ARRAYS(ARRAY(a, b), ARRAY(c, d)) " ,
2025-02-13 14:56:25 +01:00
" snowflake " : " OBJECT_CONSTRUCT(a, c, b, d) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" MAP(ARRAY( ' a ' ), ARRAY( ' b ' )) " ,
write = {
" hive " : " MAP( ' a ' , ' b ' ) " ,
" presto " : " MAP(ARRAY[ ' a ' ], ARRAY[ ' b ' ]) " ,
" spark " : " MAP_FROM_ARRAYS(ARRAY( ' a ' ), ARRAY( ' b ' )) " ,
2025-02-13 14:56:25 +01:00
" snowflake " : " OBJECT_CONSTRUCT( ' a ' , ' b ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SELECT * FROM UNNEST(ARRAY[ ' 7 ' , ' 14 ' ]) AS x " ,
write = {
" bigquery " : " SELECT * FROM UNNEST([ ' 7 ' , ' 14 ' ]) " ,
" presto " : " SELECT * FROM UNNEST(ARRAY[ ' 7 ' , ' 14 ' ]) AS x " ,
2025-02-13 21:51:42 +01:00
" hive " : " SELECT * FROM EXPLODE(ARRAY( ' 7 ' , ' 14 ' )) AS x " ,
" spark " : " SELECT * FROM EXPLODE(ARRAY( ' 7 ' , ' 14 ' )) AS x " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" SELECT * FROM UNNEST(ARRAY[ ' 7 ' , ' 14 ' ]) AS x(y) " ,
write = {
" bigquery " : " SELECT * FROM UNNEST([ ' 7 ' , ' 14 ' ]) AS y " ,
" presto " : " SELECT * FROM UNNEST(ARRAY[ ' 7 ' , ' 14 ' ]) AS x(y) " ,
2025-02-13 21:51:42 +01:00
" hive " : " SELECT * FROM EXPLODE(ARRAY( ' 7 ' , ' 14 ' )) AS x(y) " ,
" spark " : " SELECT * FROM EXPLODE(ARRAY( ' 7 ' , ' 14 ' )) AS x(y) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n+1 FROM t WHERE n < 100 ) SELECT sum(n) FROM t " ,
write = {
" presto " : " WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n + 1 FROM t WHERE n < 100) SELECT SUM(n) FROM t " ,
" spark " : UnsupportedError ,
} ,
)
2025-02-13 14:54:32 +01:00
2025-02-13 15:26:26 +01:00
self . validate_all (
" SELECT a, b, c, d, sum(y) FROM z GROUP BY CUBE(a) ROLLUP(a), GROUPING SETS((b, c)), d " ,
write = {
" presto " : " SELECT a, b, c, d, SUM(y) FROM z GROUP BY d, GROUPING SETS ((b, c)), CUBE (a), ROLLUP (a) " ,
" hive " : " SELECT a, b, c, d, SUM(y) FROM z GROUP BY d, GROUPING SETS ((b, c)), CUBE (a), ROLLUP (a) " ,
} ,
)
2025-02-13 21:17:09 +01:00
self . validate_all (
" JSON_FORMAT(CAST(MAP_FROM_ENTRIES(ARRAY[( ' action_type ' , ' at ' )]) AS JSON)) " ,
write = {
" presto " : " JSON_FORMAT(CAST(MAP_FROM_ENTRIES(ARRAY[( ' action_type ' , ' at ' )]) AS JSON)) " ,
" spark " : " TO_JSON(MAP_FROM_ENTRIES(ARRAY(( ' action_type ' , ' at ' )))) " ,
} ,
)
2025-02-13 15:50:57 +01:00
self . validate_all (
" JSON_FORMAT(x) " ,
write = {
2025-02-13 20:04:59 +01:00
" bigquery " : " TO_JSON_STRING(x) " ,
" duckdb " : " CAST(TO_JSON(x) AS TEXT) " ,
2025-02-13 15:50:57 +01:00
" presto " : " JSON_FORMAT(x) " ,
" spark " : " TO_JSON(x) " ,
} ,
)
2025-02-13 15:53:39 +01:00
self . validate_all (
2025-02-13 20:04:59 +01:00
""" JSON_FORMAT(JSON ' " x " ' ) """ ,
write = {
2025-02-13 21:02:36 +01:00
" bigquery " : """ TO_JSON_STRING(PARSE_JSON( ' " x " ' )) """ ,
" duckdb " : """ CAST(TO_JSON(JSON( ' " x " ' )) AS TEXT) """ ,
" presto " : """ JSON_FORMAT(JSON_PARSE( ' " x " ' )) """ ,
2025-02-13 20:04:59 +01:00
" spark " : """ REGEXP_EXTRACT(TO_JSON(FROM_JSON( ' [ " x " ] ' , SCHEMA_OF_JSON( ' [ " x " ] ' ))), ' ^.(.*).$ ' , 1) """ ,
} ,
)
self . validate_all (
""" SELECT JSON_FORMAT(JSON ' { " a " : 1, " b " : " c " } ' ) """ ,
write = {
" spark " : """ SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON( ' [ { " a " : 1, " b " : " c " }] ' , SCHEMA_OF_JSON( ' [ { " a " : 1, " b " : " c " }] ' ))), ' ^.(.*).$ ' , 1) """ ,
} ,
)
self . validate_all (
""" SELECT JSON_FORMAT(JSON ' [1, 2, 3] ' ) """ ,
2025-02-13 15:53:39 +01:00
write = {
2025-02-13 20:04:59 +01:00
" spark " : " SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON( ' [[1, 2, 3]] ' , SCHEMA_OF_JSON( ' [[1, 2, 3]] ' ))), ' ^.(.*).$ ' , 1) " ,
2025-02-13 15:53:39 +01:00
} ,
)
2025-02-13 21:54:13 +01:00
self . validate_all (
" REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
read = {
" presto " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" trino " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" duckdb " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" snowflake " : " REGEXP_SUBSTR( ' abc ' , ' (a)(b)(c) ' ) " ,
} ,
write = {
" presto " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" trino " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" duckdb " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' ) " ,
" snowflake " : " REGEXP_SUBSTR( ' abc ' , ' (a)(b)(c) ' ) " ,
" hive " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' , 0) " ,
" spark2 " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' , 0) " ,
" spark " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' , 0) " ,
" databricks " : " REGEXP_EXTRACT( ' abc ' , ' (a)(b)(c) ' , 0) " ,
} ,
)
2025-02-13 21:58:22 +01:00
self . validate_all (
" CURRENT_USER " ,
read = {
" presto " : " CURRENT_USER " ,
" trino " : " CURRENT_USER " ,
" snowflake " : " CURRENT_USER() " , # Although the ANSI standard is CURRENT_USER
} ,
write = {
" trino " : " CURRENT_USER " ,
" snowflake " : " CURRENT_USER() " ,
} ,
)
2025-02-13 22:00:53 +01:00
self . validate_identity (
" SELECT id, FIRST_VALUE(is_deleted) OVER (PARTITION BY id) AS first_is_deleted, NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted, LAST_VALUE(is_deleted) OVER (PARTITION BY id) AS last_is_deleted FROM my_table "
)
2025-02-13 15:53:39 +01:00
2025-02-13 14:54:32 +01:00
def test_encode_decode ( self ) :
2025-02-13 20:51:40 +01:00
self . validate_identity ( " FROM_UTF8(x, y) " )
2025-02-13 14:54:32 +01:00
self . validate_all (
" TO_UTF8(x) " ,
2025-02-13 20:51:40 +01:00
read = {
" duckdb " : " ENCODE(x) " ,
" spark " : " ENCODE(x, ' utf-8 ' ) " ,
} ,
2025-02-13 14:54:32 +01:00
write = {
2025-02-13 20:51:40 +01:00
" duckdb " : " ENCODE(x) " ,
" presto " : " TO_UTF8(x) " ,
2025-02-13 14:54:32 +01:00
" spark " : " ENCODE(x, ' utf-8 ' ) " ,
} ,
)
self . validate_all (
" FROM_UTF8(x) " ,
2025-02-13 20:51:40 +01:00
read = {
" duckdb " : " DECODE(x) " ,
2025-02-13 14:54:32 +01:00
" spark " : " DECODE(x, ' utf-8 ' ) " ,
} ,
write = {
2025-02-13 20:51:40 +01:00
" duckdb " : " DECODE(x) " ,
2025-02-13 14:54:32 +01:00
" presto " : " FROM_UTF8(x) " ,
2025-02-13 20:51:40 +01:00
" spark " : " DECODE(x, ' utf-8 ' ) " ,
2025-02-13 14:54:32 +01:00
} ,
)
self . validate_all (
" ENCODE(x, ' invalid ' ) " ,
write = {
" presto " : UnsupportedError ,
2025-02-13 20:51:40 +01:00
" duckdb " : UnsupportedError ,
2025-02-13 14:54:32 +01:00
} ,
)
self . validate_all (
" DECODE(x, ' invalid ' ) " ,
write = {
" presto " : UnsupportedError ,
2025-02-13 20:51:40 +01:00
" duckdb " : UnsupportedError ,
2025-02-13 14:54:32 +01:00
} ,
)
def test_hex_unhex ( self ) :
self . validate_all (
" TO_HEX(x) " ,
write = {
" spark " : " HEX(x) " ,
} ,
)
self . validate_all (
" FROM_HEX(x) " ,
write = {
" spark " : " UNHEX(x) " ,
} ,
)
self . validate_all (
" HEX(x) " ,
write = {
" presto " : " TO_HEX(x) " ,
} ,
)
self . validate_all (
" UNHEX(x) " ,
write = {
" presto " : " FROM_HEX(x) " ,
} ,
)
2025-02-13 15:09:58 +01:00
def test_json ( self ) :
2025-02-13 21:33:25 +01:00
with self . assertLogs ( helper_logger ) :
self . validate_all (
""" SELECT JSON_EXTRACT_SCALAR(TRY(FILTER(CAST(JSON_EXTRACT( ' { " k1 " : [ { " k2 " : " { \\ " k3 \\ " : 1} " , " k4 " : " v " }]} ' , ' $.k1 ' ) AS ARRAY(MAP(VARCHAR, VARCHAR))), x -> x[ ' k4 ' ] = ' v ' )[1][ ' k2 ' ]), ' $.k3 ' ) """ ,
write = {
" presto " : """ SELECT JSON_EXTRACT_SCALAR(TRY(FILTER(CAST(JSON_EXTRACT( ' { " k1 " : [ { " k2 " : " { \\ " k3 \\ " : 1} " , " k4 " : " v " }]} ' , ' $.k1 ' ) AS ARRAY(MAP(VARCHAR, VARCHAR))), x -> x[ ' k4 ' ] = ' v ' )[1][ ' k2 ' ]), ' $.k3 ' ) """ ,
" spark " : """ SELECT GET_JSON_OBJECT(FILTER(FROM_JSON(GET_JSON_OBJECT( ' { " k1 " : [ { " k2 " : " { \\ \\ " k3 \\ \\ " : 1} " , " k4 " : " v " }]} ' , ' $.k1 ' ), ' ARRAY<MAP<STRING, STRING>> ' ), x -> x[ ' k4 ' ] = ' v ' )[0][ ' k2 ' ], ' $.k3 ' ) """ ,
} ,
)
2025-02-13 15:09:58 +01:00
self . validate_all (
" SELECT CAST(JSON ' [1,23,456] ' AS ARRAY(INTEGER)) " ,
write = {
" spark " : " SELECT FROM_JSON( ' [1,23,456] ' , ' ARRAY<INT> ' ) " ,
2025-02-13 21:02:36 +01:00
" presto " : " SELECT CAST(JSON_PARSE( ' [1,23,456] ' ) AS ARRAY(INTEGER)) " ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
""" SELECT CAST(JSON ' { " k1 " :1, " k2 " :23, " k3 " :456} ' AS MAP(VARCHAR, INTEGER)) """ ,
write = {
" spark " : ' SELECT FROM_JSON( \' { " k1 " :1, " k2 " :23, " k3 " :456} \' , \' MAP<STRING, INT> \' ) ' ,
2025-02-13 21:02:36 +01:00
" presto " : ' SELECT CAST(JSON_PARSE( \' { " k1 " :1, " k2 " :23, " k3 " :456} \' ) AS MAP(VARCHAR, INTEGER)) ' ,
2025-02-13 15:09:58 +01:00
} ,
)
self . validate_all (
" SELECT CAST(ARRAY [1, 23, 456] AS JSON) " ,
write = {
" spark " : " SELECT TO_JSON(ARRAY(1, 23, 456)) " ,
" presto " : " SELECT CAST(ARRAY[1, 23, 456] AS JSON) " ,
} ,
)
2025-02-13 15:52:09 +01:00
2025-02-13 15:57:23 +01:00
def test_match_recognize ( self ) :
self . validate_identity (
""" SELECT
*
FROM orders
MATCH_RECOGNIZE (
PARTITION BY custkey
ORDER BY
orderdate
MEASURES
A . totalprice AS starting_price ,
LAST ( B . totalprice ) AS bottom_price ,
LAST ( C . totalprice ) AS top_price
ONE ROW PER MATCH
AFTER MATCH SKIP PAST LAST ROW
PATTERN ( A B + C + D + )
DEFINE
B AS totalprice < PREV ( totalprice ) ,
C AS totalprice > PREV ( totalprice ) AND totalprice < = A . totalprice ,
2025-02-13 21:58:22 +01:00
D AS totalprice > PREV ( totalprice ) ,
E AS MAX ( foo ) > = NEXT ( bar )
2025-02-13 15:57:23 +01:00
) """ ,
pretty = True ,
)
2025-02-13 21:17:51 +01:00
def test_to_char ( self ) :
self . validate_all (
" TO_CHAR(ts, ' dd ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' %d ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' %d ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' hh ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % H ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' % H ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' hh24 ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % H ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' % H ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' mi ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % M ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' %i ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' mm ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % m ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' % m ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' ss ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % S ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' %s ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' yyyy ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % Y ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' % Y ' ) " ,
} ,
)
self . validate_all (
" TO_CHAR(ts, ' yy ' ) " ,
write = {
" bigquery " : " FORMAT_DATE( ' % y ' , ts) " ,
" presto " : " DATE_FORMAT(ts, ' % y ' ) " ,
} ,
)
2025-02-13 21:29:39 +01:00
def test_signum ( self ) :
self . validate_all (
" SIGN(x) " ,
read = {
" presto " : " SIGN(x) " ,
" spark " : " SIGNUM(x) " ,
" starrocks " : " SIGN(x) " ,
} ,
write = {
" presto " : " SIGN(x) " ,
" spark " : " SIGN(x) " ,
" starrocks " : " SIGN(x) " ,
} ,
)
2025-02-13 21:41:14 +01:00
def test_json_vs_row_extract ( self ) :
for dialect in ( " trino " , " presto " ) :
s = parse_one ( ' SELECT col:x:y. " special string " ' , read = " snowflake " )
dialect_json_extract_setting = f " { dialect } , variant_extract_is_json_extract=True "
dialect_row_access_setting = f " { dialect } , variant_extract_is_json_extract=False "
# By default, Snowflake VARIANT will generate JSON_EXTRACT() in Presto/Trino
json_extract_result = """ SELECT JSON_EXTRACT(col, ' $.x.y[ " special string " ] ' ) """
self . assertEqual ( s . sql ( dialect ) , json_extract_result )
self . assertEqual ( s . sql ( dialect_json_extract_setting ) , json_extract_result )
# If the setting is overriden to False, then generate ROW access (dot notation)
self . assertEqual ( s . sql ( dialect_row_access_setting ) , ' SELECT col.x.y. " special string " ' )
2025-02-13 22:00:08 +01:00
def test_analyze ( self ) :
self . validate_identity ( " ANALYZE tbl " )
self . validate_identity ( " ANALYZE tbl WITH (prop1=val1, prop2=val2) " )