2025-02-13 06:15:54 +01:00
import unittest
2025-02-13 14:53:05 +01:00
from sqlglot import Dialect , Dialects , ErrorLevel , UnsupportedError , parse_one
2025-02-13 06:15:54 +01:00
class Validator ( unittest . TestCase ) :
dialect = None
2025-02-13 14:53:05 +01:00
def parse_one ( self , sql ) :
return parse_one ( sql , read = self . dialect )
def validate_identity ( self , sql , write_sql = None ) :
expression = self . parse_one ( sql )
self . assertEqual ( write_sql or sql , expression . sql ( dialect = self . dialect ) )
return expression
2025-02-13 06:15:54 +01:00
def validate_all ( self , sql , read = None , write = None , pretty = False ) :
"""
Validate that :
1. Everything in ` read ` transpiles to ` sql `
2. ` sql ` transpiles to everything in ` write `
Args :
sql ( str ) : Main SQL expression
dialect ( str ) : dialect of ` sql `
read ( dict ) : Mapping of dialect - > SQL
write ( dict ) : Mapping of dialect - > SQL
"""
2025-02-13 14:53:05 +01:00
expression = self . parse_one ( sql )
2025-02-13 06:15:54 +01:00
for read_dialect , read_sql in ( read or { } ) . items ( ) :
with self . subTest ( f " { read_dialect } -> { sql } " ) :
self . assertEqual (
2025-02-13 14:53:05 +01:00
parse_one ( read_sql , read_dialect ) . sql (
self . dialect , unsupported_level = ErrorLevel . IGNORE , pretty = pretty
) ,
2025-02-13 06:15:54 +01:00
sql ,
)
for write_dialect , write_sql in ( write or { } ) . items ( ) :
with self . subTest ( f " { sql } -> { write_dialect } " ) :
if write_sql is UnsupportedError :
with self . assertRaises ( UnsupportedError ) :
2025-02-13 08:04:41 +01:00
expression . sql ( write_dialect , unsupported_level = ErrorLevel . RAISE )
2025-02-13 06:15:54 +01:00
else :
self . assertEqual (
expression . sql (
write_dialect ,
unsupported_level = ErrorLevel . IGNORE ,
pretty = pretty ,
) ,
write_sql ,
)
class TestDialect ( Validator ) :
maxDiff = None
def test_enum ( self ) :
for dialect in Dialects :
self . assertIsNotNone ( Dialect [ dialect ] )
self . assertIsNotNone ( Dialect . get ( dialect ) )
self . assertIsNotNone ( Dialect . get_or_raise ( dialect ) )
self . assertIsNotNone ( Dialect [ dialect . value ] )
def test_cast ( self ) :
self . validate_all (
" CAST(a AS TEXT) " ,
write = {
" bigquery " : " CAST(a AS STRING) " ,
" clickhouse " : " CAST(a AS TEXT) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS TEXT) " ,
" mysql " : " CAST(a AS TEXT) " ,
" hive " : " CAST(a AS STRING) " ,
" oracle " : " CAST(a AS CLOB) " ,
" postgres " : " CAST(a AS TEXT) " ,
" presto " : " CAST(a AS VARCHAR) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS TEXT) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS TEXT) " ,
" spark " : " CAST(a AS STRING) " ,
" starrocks " : " CAST(a AS STRING) " ,
} ,
)
2025-02-13 14:51:47 +01:00
self . validate_all (
" CAST(a AS BINARY(4)) " ,
write = {
" bigquery " : " CAST(a AS BINARY(4)) " ,
" clickhouse " : " CAST(a AS BINARY(4)) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS VARBINARY(4)) " ,
2025-02-13 14:51:47 +01:00
" duckdb " : " CAST(a AS BINARY(4)) " ,
" mysql " : " CAST(a AS BINARY(4)) " ,
" hive " : " CAST(a AS BINARY(4)) " ,
" oracle " : " CAST(a AS BLOB(4)) " ,
" postgres " : " CAST(a AS BYTEA(4)) " ,
" presto " : " CAST(a AS VARBINARY(4)) " ,
" redshift " : " CAST(a AS VARBYTE(4)) " ,
" snowflake " : " CAST(a AS BINARY(4)) " ,
" sqlite " : " CAST(a AS BLOB(4)) " ,
" spark " : " CAST(a AS BINARY(4)) " ,
" starrocks " : " CAST(a AS BINARY(4)) " ,
} ,
)
2025-02-13 14:53:05 +01:00
self . validate_all (
" CAST(a AS VARBINARY(4)) " ,
write = {
" bigquery " : " CAST(a AS VARBINARY(4)) " ,
" clickhouse " : " CAST(a AS VARBINARY(4)) " ,
" duckdb " : " CAST(a AS VARBINARY(4)) " ,
" mysql " : " CAST(a AS VARBINARY(4)) " ,
" hive " : " CAST(a AS BINARY(4)) " ,
" oracle " : " CAST(a AS BLOB(4)) " ,
" postgres " : " CAST(a AS BYTEA(4)) " ,
" presto " : " CAST(a AS VARBINARY(4)) " ,
" redshift " : " CAST(a AS VARBYTE(4)) " ,
" snowflake " : " CAST(a AS VARBINARY(4)) " ,
" sqlite " : " CAST(a AS BLOB(4)) " ,
" spark " : " CAST(a AS BINARY(4)) " ,
" starrocks " : " CAST(a AS VARBINARY(4)) " ,
} ,
)
2025-02-13 14:45:11 +01:00
self . validate_all (
" CAST(MAP( ' a ' , ' 1 ' ) AS MAP(TEXT, TEXT)) " ,
write = {
" clickhouse " : " CAST(map( ' a ' , ' 1 ' ) AS Map(TEXT, TEXT)) " ,
} ,
)
self . validate_all (
" CAST(ARRAY(1, 2) AS ARRAY<TINYINT>) " ,
write = {
" clickhouse " : " CAST([1, 2] AS Array(Int8)) " ,
} ,
)
self . validate_all (
" CAST((1, 2) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>) " ,
write = {
" clickhouse " : " CAST((1, 2) AS Tuple(a Int8, b Int16, c Int32, d Int64)) " ,
} ,
)
2025-02-13 08:04:41 +01:00
self . validate_all (
" CAST(a AS DATETIME) " ,
write = {
" postgres " : " CAST(a AS TIMESTAMP) " ,
" sqlite " : " CAST(a AS DATETIME) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CAST(a AS STRING) " ,
write = {
" bigquery " : " CAST(a AS STRING) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS TEXT) " ,
" mysql " : " CAST(a AS TEXT) " ,
" hive " : " CAST(a AS STRING) " ,
" oracle " : " CAST(a AS CLOB) " ,
" postgres " : " CAST(a AS TEXT) " ,
" presto " : " CAST(a AS VARCHAR) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS TEXT) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS TEXT) " ,
" spark " : " CAST(a AS STRING) " ,
" starrocks " : " CAST(a AS STRING) " ,
} ,
)
self . validate_all (
" CAST(a AS VARCHAR) " ,
write = {
" bigquery " : " CAST(a AS STRING) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS TEXT) " ,
" mysql " : " CAST(a AS VARCHAR) " ,
" hive " : " CAST(a AS STRING) " ,
" oracle " : " CAST(a AS VARCHAR2) " ,
" postgres " : " CAST(a AS VARCHAR) " ,
" presto " : " CAST(a AS VARCHAR) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS VARCHAR) " ,
" spark " : " CAST(a AS STRING) " ,
" starrocks " : " CAST(a AS VARCHAR) " ,
} ,
)
self . validate_all (
" CAST(a AS VARCHAR(3)) " ,
write = {
" bigquery " : " CAST(a AS STRING(3)) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS VARCHAR(3)) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS TEXT(3)) " ,
" mysql " : " CAST(a AS VARCHAR(3)) " ,
" hive " : " CAST(a AS VARCHAR(3)) " ,
" oracle " : " CAST(a AS VARCHAR2(3)) " ,
" postgres " : " CAST(a AS VARCHAR(3)) " ,
" presto " : " CAST(a AS VARCHAR(3)) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS VARCHAR(3)) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS VARCHAR(3)) " ,
" spark " : " CAST(a AS VARCHAR(3)) " ,
" starrocks " : " CAST(a AS VARCHAR(3)) " ,
} ,
)
self . validate_all (
" CAST(a AS SMALLINT) " ,
write = {
" bigquery " : " CAST(a AS INT64) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS INTEGER) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS SMALLINT) " ,
" mysql " : " CAST(a AS SMALLINT) " ,
" hive " : " CAST(a AS SMALLINT) " ,
" oracle " : " CAST(a AS NUMBER) " ,
" postgres " : " CAST(a AS SMALLINT) " ,
" presto " : " CAST(a AS SMALLINT) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS SMALLINT) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS SMALLINT) " ,
" spark " : " CAST(a AS SHORT) " ,
" sqlite " : " CAST(a AS INTEGER) " ,
" starrocks " : " CAST(a AS SMALLINT) " ,
} ,
)
2025-02-13 08:04:41 +01:00
self . validate_all (
" TRY_CAST(a AS DOUBLE) " ,
read = {
" postgres " : " CAST(a AS DOUBLE PRECISION) " ,
" redshift " : " CAST(a AS DOUBLE PRECISION) " ,
} ,
write = {
" duckdb " : " TRY_CAST(a AS DOUBLE) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS DOUBLE) " ,
2025-02-13 08:04:41 +01:00
" postgres " : " CAST(a AS DOUBLE PRECISION) " ,
" redshift " : " CAST(a AS DOUBLE PRECISION) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CAST(a AS DOUBLE) " ,
write = {
" bigquery " : " CAST(a AS FLOAT64) " ,
2025-02-13 14:45:11 +01:00
" clickhouse " : " CAST(a AS Float64) " ,
2025-02-13 14:54:32 +01:00
" drill " : " CAST(a AS DOUBLE) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(a AS DOUBLE) " ,
" mysql " : " CAST(a AS DOUBLE) " ,
" hive " : " CAST(a AS DOUBLE) " ,
" oracle " : " CAST(a AS DOUBLE PRECISION) " ,
" postgres " : " CAST(a AS DOUBLE PRECISION) " ,
" presto " : " CAST(a AS DOUBLE) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CAST(a AS DOUBLE PRECISION) " ,
2025-02-13 06:15:54 +01:00
" snowflake " : " CAST(a AS DOUBLE) " ,
" spark " : " CAST(a AS DOUBLE) " ,
" starrocks " : " CAST(a AS DOUBLE) " ,
} ,
)
self . validate_all (
2025-02-13 08:04:41 +01:00
" CAST( ' 1 DAY ' AS INTERVAL) " ,
write = {
" postgres " : " CAST( ' 1 DAY ' AS INTERVAL) " ,
" redshift " : " CAST( ' 1 DAY ' AS INTERVAL) " ,
} ,
2025-02-13 06:15:54 +01:00
)
self . validate_all (
2025-02-13 08:04:41 +01:00
" CAST(a AS TIMESTAMP) " ,
write = {
" starrocks " : " CAST(a AS DATETIME) " ,
" redshift " : " CAST(a AS TIMESTAMP) " ,
} ,
)
self . validate_all (
" CAST(a AS TIMESTAMPTZ) " ,
write = {
" starrocks " : " CAST(a AS DATETIME) " ,
" redshift " : " CAST(a AS TIMESTAMPTZ) " ,
} ,
2025-02-13 06:15:54 +01:00
)
self . validate_all ( " CAST(a AS TINYINT) " , write = { " oracle " : " CAST(a AS NUMBER) " } )
self . validate_all ( " CAST(a AS SMALLINT) " , write = { " oracle " : " CAST(a AS NUMBER) " } )
self . validate_all ( " CAST(a AS BIGINT) " , write = { " oracle " : " CAST(a AS NUMBER) " } )
self . validate_all ( " CAST(a AS INT) " , write = { " oracle " : " CAST(a AS NUMBER) " } )
self . validate_all (
" CAST(a AS DECIMAL) " ,
read = { " oracle " : " CAST(a AS NUMBER) " } ,
write = { " oracle " : " CAST(a AS NUMBER) " } ,
)
def test_time ( self ) :
self . validate_all (
" STR_TO_TIME(x, ' % Y- % m- %d T % H: % M: % S ' ) " ,
read = {
" duckdb " : " STRPTIME(x, ' % Y- % m- %d T % H: % M: % S ' ) " ,
} ,
write = {
" mysql " : " STR_TO_DATE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
" duckdb " : " STRPTIME(x, ' % Y- % m- %d T % H: % M: % S ' ) " ,
" hive " : " CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, ' yyyy-MM-ddTHH:mm:ss ' )) AS TIMESTAMP) " ,
" presto " : " DATE_PARSE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
2025-02-13 14:54:32 +01:00
" drill " : " TO_TIMESTAMP(x, ' yyyy-MM-dd ' ' T ' ' HH:mm:ss ' ) " ,
2025-02-13 14:31:47 +01:00
" redshift " : " TO_TIMESTAMP(x, ' YYYY-MM-DDTHH:MI:SS ' ) " ,
2025-02-13 06:15:54 +01:00
" spark " : " TO_TIMESTAMP(x, ' yyyy-MM-ddTHH:mm:ss ' ) " ,
} ,
)
self . validate_all (
" STR_TO_TIME( ' 2020-01-01 ' , ' % Y- % m- %d ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " TO_TIMESTAMP( ' 2020-01-01 ' , ' yyyy-MM-dd ' ) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " STRPTIME( ' 2020-01-01 ' , ' % Y- % m- %d ' ) " ,
" hive " : " CAST( ' 2020-01-01 ' AS TIMESTAMP) " ,
2025-02-13 14:45:11 +01:00
" oracle " : " TO_TIMESTAMP( ' 2020-01-01 ' , ' YYYY-MM-DD ' ) " ,
" postgres " : " TO_TIMESTAMP( ' 2020-01-01 ' , ' YYYY-MM-DD ' ) " ,
2025-02-13 06:15:54 +01:00
" presto " : " DATE_PARSE( ' 2020-01-01 ' , ' % Y- % m- %d ' ) " ,
2025-02-13 14:31:47 +01:00
" redshift " : " TO_TIMESTAMP( ' 2020-01-01 ' , ' YYYY-MM-DD ' ) " ,
2025-02-13 06:15:54 +01:00
" spark " : " TO_TIMESTAMP( ' 2020-01-01 ' , ' yyyy-MM-dd ' ) " ,
} ,
)
self . validate_all (
" STR_TO_TIME(x, ' % y ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " TO_TIMESTAMP(x, ' yy ' ) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " STRPTIME(x, ' % y ' ) " ,
" hive " : " CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, ' yy ' )) AS TIMESTAMP) " ,
" presto " : " DATE_PARSE(x, ' % y ' ) " ,
2025-02-13 14:45:11 +01:00
" oracle " : " TO_TIMESTAMP(x, ' YY ' ) " ,
" postgres " : " TO_TIMESTAMP(x, ' YY ' ) " ,
2025-02-13 14:31:47 +01:00
" redshift " : " TO_TIMESTAMP(x, ' YY ' ) " ,
2025-02-13 06:15:54 +01:00
" spark " : " TO_TIMESTAMP(x, ' yy ' ) " ,
} ,
)
self . validate_all (
" STR_TO_UNIX( ' 2020-01-01 ' , ' % Y- % M- %d ' ) " ,
write = {
" duckdb " : " EPOCH(STRPTIME( ' 2020-01-01 ' , ' % Y- % M- %d ' )) " ,
" hive " : " UNIX_TIMESTAMP( ' 2020-01-01 ' , ' yyyy-mm-dd ' ) " ,
" presto " : " TO_UNIXTIME(DATE_PARSE( ' 2020-01-01 ' , ' % Y- %i - %d ' )) " ,
2025-02-13 14:40:43 +01:00
" starrocks " : " UNIX_TIMESTAMP( ' 2020-01-01 ' , ' % Y- %i - %d ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" TIME_STR_TO_DATE( ' 2020-01-01 ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST( ' 2020-01-01 ' AS DATE) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST( ' 2020-01-01 ' AS DATE) " ,
" hive " : " TO_DATE( ' 2020-01-01 ' ) " ,
" presto " : " DATE_PARSE( ' 2020-01-01 ' , ' % Y- % m- %d % H: %i : %s ' ) " ,
2025-02-13 14:40:43 +01:00
" starrocks " : " TO_DATE( ' 2020-01-01 ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" TIME_STR_TO_TIME( ' 2020-01-01 ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST( ' 2020-01-01 ' AS TIMESTAMP) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST( ' 2020-01-01 ' AS TIMESTAMP) " ,
" hive " : " CAST( ' 2020-01-01 ' AS TIMESTAMP) " ,
" presto " : " DATE_PARSE( ' 2020-01-01 ' , ' % Y- % m- %d % H: %i : %s ' ) " ,
} ,
)
self . validate_all (
" TIME_STR_TO_UNIX( ' 2020-01-01 ' ) " ,
write = {
" duckdb " : " EPOCH(CAST( ' 2020-01-01 ' AS TIMESTAMP)) " ,
" hive " : " UNIX_TIMESTAMP( ' 2020-01-01 ' ) " ,
" presto " : " TO_UNIXTIME(DATE_PARSE( ' 2020-01-01 ' , ' % Y- % m- %d % H: %i : % S ' )) " ,
} ,
)
self . validate_all (
" TIME_TO_STR(x, ' % Y- % m- %d ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " TO_CHAR(x, ' yyyy-MM-dd ' ) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " STRFTIME(x, ' % Y- % m- %d ' ) " ,
" hive " : " DATE_FORMAT(x, ' yyyy-MM-dd ' ) " ,
2025-02-13 14:45:11 +01:00
" oracle " : " TO_CHAR(x, ' YYYY-MM-DD ' ) " ,
" postgres " : " TO_CHAR(x, ' YYYY-MM-DD ' ) " ,
2025-02-13 06:15:54 +01:00
" presto " : " DATE_FORMAT(x, ' % Y- % m- %d ' ) " ,
2025-02-13 14:31:47 +01:00
" redshift " : " TO_CHAR(x, ' YYYY-MM-DD ' ) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" TIME_TO_TIME_STR(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST(x AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(x AS TEXT) " ,
" hive " : " CAST(x AS STRING) " ,
" presto " : " CAST(x AS VARCHAR) " ,
2025-02-13 14:31:47 +01:00
" redshift " : " CAST(x AS TEXT) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" TIME_TO_UNIX(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " UNIX_TIMESTAMP(x) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " EPOCH(x) " ,
" hive " : " UNIX_TIMESTAMP(x) " ,
" presto " : " TO_UNIXTIME(x) " ,
} ,
)
self . validate_all (
" TS_OR_DS_TO_DATE_STR(x) " ,
write = {
" duckdb " : " SUBSTRING(CAST(x AS TEXT), 1, 10) " ,
" hive " : " SUBSTRING(CAST(x AS STRING), 1, 10) " ,
" presto " : " SUBSTRING(CAST(x AS VARCHAR), 1, 10) " ,
} ,
)
self . validate_all (
" TS_OR_DS_TO_DATE(x) " ,
write = {
" duckdb " : " CAST(x AS DATE) " ,
" hive " : " TO_DATE(x) " ,
" presto " : " CAST(SUBSTR(CAST(x AS VARCHAR), 1, 10) AS DATE) " ,
} ,
)
self . validate_all (
" TS_OR_DS_TO_DATE(x, ' %-d ' ) " ,
write = {
" duckdb " : " CAST(STRPTIME(x, ' %-d ' ) AS DATE) " ,
" hive " : " TO_DATE(x, ' d ' ) " ,
" presto " : " CAST(DATE_PARSE(x, ' %e ' ) AS DATE) " ,
" spark " : " TO_DATE(x, ' d ' ) " ,
} ,
)
self . validate_all (
" UNIX_TO_STR(x, y) " ,
write = {
" duckdb " : " STRFTIME(TO_TIMESTAMP(CAST(x AS BIGINT)), y) " ,
" hive " : " FROM_UNIXTIME(x, y) " ,
" presto " : " DATE_FORMAT(FROM_UNIXTIME(x), y) " ,
2025-02-13 14:40:43 +01:00
" starrocks " : " FROM_UNIXTIME(x, y) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" UNIX_TO_TIME(x) " ,
write = {
" duckdb " : " TO_TIMESTAMP(CAST(x AS BIGINT)) " ,
" hive " : " FROM_UNIXTIME(x) " ,
2025-02-13 14:45:11 +01:00
" oracle " : " TO_DATE( ' 1970-01-01 ' , ' YYYY-MM-DD ' ) + (x / 86400) " ,
" postgres " : " TO_TIMESTAMP(x) " ,
2025-02-13 06:15:54 +01:00
" presto " : " FROM_UNIXTIME(x) " ,
2025-02-13 14:40:43 +01:00
" starrocks " : " FROM_UNIXTIME(x) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" UNIX_TO_TIME_STR(x) " ,
write = {
" duckdb " : " CAST(TO_TIMESTAMP(CAST(x AS BIGINT)) AS TEXT) " ,
" hive " : " FROM_UNIXTIME(x) " ,
" presto " : " CAST(FROM_UNIXTIME(x) AS VARCHAR) " ,
} ,
)
self . validate_all (
" DATE_TO_DATE_STR(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST(x AS VARCHAR) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(x AS TEXT) " ,
" hive " : " CAST(x AS STRING) " ,
" presto " : " CAST(x AS VARCHAR) " ,
} ,
)
self . validate_all (
" DATE_TO_DI(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST(TO_DATE(x, ' yyyyMMdd ' ) AS INT) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(STRFTIME(x, ' % Y % m %d ' ) AS INT) " ,
" hive " : " CAST(DATE_FORMAT(x, ' yyyyMMdd ' ) AS INT) " ,
" presto " : " CAST(DATE_FORMAT(x, ' % Y % m %d ' ) AS INT) " ,
} ,
)
self . validate_all (
" DI_TO_DATE(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " TO_DATE(CAST(x AS VARCHAR), ' yyyyMMdd ' ) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(STRPTIME(CAST(x AS TEXT), ' % Y % m %d ' ) AS DATE) " ,
" hive " : " TO_DATE(CAST(x AS STRING), ' yyyyMMdd ' ) " ,
" presto " : " CAST(DATE_PARSE(CAST(x AS VARCHAR), ' % Y % m %d ' ) AS DATE) " ,
} ,
)
self . validate_all (
" TS_OR_DI_TO_DI(x) " ,
write = {
" duckdb " : " CAST(SUBSTR(REPLACE(CAST(x AS TEXT), ' - ' , ' ' ), 1, 8) AS INT) " ,
" hive " : " CAST(SUBSTR(REPLACE(CAST(x AS STRING), ' - ' , ' ' ), 1, 8) AS INT) " ,
" presto " : " CAST(SUBSTR(REPLACE(CAST(x AS VARCHAR), ' - ' , ' ' ), 1, 8) AS INT) " ,
" spark " : " CAST(SUBSTR(REPLACE(CAST(x AS STRING), ' - ' , ' ' ), 1, 8) AS INT) " ,
} ,
)
self . validate_all (
" DATE_ADD(x, 1, ' day ' ) " ,
read = {
" mysql " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
" starrocks " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
} ,
write = {
" bigquery " : " DATE_ADD(x, INTERVAL 1 ' day ' ) " ,
2025-02-13 14:54:32 +01:00
" drill " : " DATE_ADD(x, INTERVAL ' 1 ' DAY) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " x + INTERVAL 1 day " ,
" hive " : " DATE_ADD(x, 1) " ,
" mysql " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
" postgres " : " x + INTERVAL ' 1 ' ' day ' " ,
" presto " : " DATE_ADD( ' day ' , 1, x) " ,
" spark " : " DATE_ADD(x, 1) " ,
" starrocks " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
2025-02-13 14:50:31 +01:00
" tsql " : " DATEADD(day, 1, x) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" DATE_ADD(x, 1) " ,
write = {
" bigquery " : " DATE_ADD(x, INTERVAL 1 ' day ' ) " ,
2025-02-13 14:54:32 +01:00
" drill " : " DATE_ADD(x, INTERVAL ' 1 ' DAY) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " x + INTERVAL 1 DAY " ,
" hive " : " DATE_ADD(x, 1) " ,
" mysql " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
" presto " : " DATE_ADD( ' day ' , 1, x) " ,
" spark " : " DATE_ADD(x, 1) " ,
" starrocks " : " DATE_ADD(x, INTERVAL 1 DAY) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' day ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : " DATE(x) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' week ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : " STR_TO_DATE(CONCAT(YEAR(x), ' ' , WEEK(x, 1), ' 1 ' ), ' % Y %u % w ' ) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' month ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : " STR_TO_DATE(CONCAT(YEAR(x), ' ' , MONTH(x), ' 1 ' ), ' % Y %c %e ' ) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' quarter ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : " STR_TO_DATE(CONCAT(YEAR(x), ' ' , QUARTER(x) * 3 - 2, ' 1 ' ), ' % Y %c %e ' ) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' year ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : " STR_TO_DATE(CONCAT(YEAR(x), ' 1 1 ' ), ' % Y %c %e ' ) " ,
} ,
)
self . validate_all (
2025-02-13 14:53:05 +01:00
" DATE_TRUNC( ' millenium ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
" mysql " : UnsupportedError ,
2025-02-13 14:53:05 +01:00
} ,
)
self . validate_all (
" DATE_TRUNC( ' year ' , x) " ,
read = {
" starrocks " : " DATE_TRUNC( ' year ' , x) " ,
} ,
write = {
" starrocks " : " DATE_TRUNC( ' year ' , x) " ,
} ,
)
self . validate_all (
" DATE_TRUNC(x, year) " ,
read = {
" bigquery " : " DATE_TRUNC(x, year) " ,
} ,
write = {
" bigquery " : " DATE_TRUNC(x, year) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" STR_TO_DATE(x, ' % Y- % m- %d T % H: % M: % S ' ) " ,
read = {
" mysql " : " STR_TO_DATE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
" starrocks " : " STR_TO_DATE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
} ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " TO_DATE(x, ' yyyy-MM-dd ' ' T ' ' HH:mm:ss ' ) " ,
2025-02-13 06:15:54 +01:00
" mysql " : " STR_TO_DATE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
" starrocks " : " STR_TO_DATE(x, ' % Y- % m- %d T % H: %i : % S ' ) " ,
" hive " : " CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, ' yyyy-MM-ddTHH:mm:ss ' )) AS DATE) " ,
" presto " : " CAST(DATE_PARSE(x, ' % Y- % m- %d T % H: %i : % S ' ) AS DATE) " ,
" spark " : " TO_DATE(x, ' yyyy-MM-ddTHH:mm:ss ' ) " ,
} ,
)
self . validate_all (
" STR_TO_DATE(x, ' % Y- % m- %d ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST(x AS DATE) " ,
2025-02-13 06:15:54 +01:00
" mysql " : " STR_TO_DATE(x, ' % Y- % m- %d ' ) " ,
" starrocks " : " STR_TO_DATE(x, ' % Y- % m- %d ' ) " ,
" hive " : " CAST(x AS DATE) " ,
" presto " : " CAST(DATE_PARSE(x, ' % Y- % m- %d ' ) AS DATE) " ,
" spark " : " TO_DATE(x) " ,
} ,
)
self . validate_all (
" DATE_STR_TO_DATE(x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST(x AS DATE) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST(x AS DATE) " ,
" hive " : " TO_DATE(x) " ,
" presto " : " CAST(DATE_PARSE(x, ' % Y- % m- %d ' ) AS DATE) " ,
" spark " : " TO_DATE(x) " ,
} ,
)
self . validate_all (
" TS_OR_DS_ADD( ' 2021-02-01 ' , 1, ' DAY ' ) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " DATE_ADD(CAST( ' 2021-02-01 ' AS DATE), INTERVAL ' 1 ' DAY) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST( ' 2021-02-01 ' AS DATE) + INTERVAL 1 DAY " ,
" hive " : " DATE_ADD( ' 2021-02-01 ' , 1) " ,
" presto " : " DATE_ADD( ' DAY ' , 1, DATE_PARSE(SUBSTR( ' 2021-02-01 ' , 1, 10), ' % Y- % m- %d ' )) " ,
" spark " : " DATE_ADD( ' 2021-02-01 ' , 1) " ,
} ,
)
self . validate_all (
" DATE_ADD(CAST( ' 2020-01-01 ' AS DATE), 1) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " DATE_ADD(CAST( ' 2020-01-01 ' AS DATE), INTERVAL ' 1 ' DAY) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CAST( ' 2020-01-01 ' AS DATE) + INTERVAL 1 DAY " ,
" hive " : " DATE_ADD(CAST( ' 2020-01-01 ' AS DATE), 1) " ,
" presto " : " DATE_ADD( ' day ' , 1, CAST( ' 2020-01-01 ' AS DATE)) " ,
" spark " : " DATE_ADD(CAST( ' 2020-01-01 ' AS DATE), 1) " ,
} ,
)
2025-02-13 14:53:05 +01:00
self . validate_all (
" TIMESTAMP ' 2022-01-01 ' " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CAST( ' 2022-01-01 ' AS TIMESTAMP) " ,
2025-02-13 14:53:05 +01:00
" mysql " : " CAST( ' 2022-01-01 ' AS TIMESTAMP) " ,
" starrocks " : " CAST( ' 2022-01-01 ' AS DATETIME) " ,
" hive " : " CAST( ' 2022-01-01 ' AS TIMESTAMP) " ,
} ,
)
self . validate_all (
" TIMESTAMP( ' 2022-01-01 ' ) " ,
write = {
" mysql " : " TIMESTAMP( ' 2022-01-01 ' ) " ,
" starrocks " : " TIMESTAMP( ' 2022-01-01 ' ) " ,
" hive " : " TIMESTAMP( ' 2022-01-01 ' ) " ,
} ,
)
2025-02-13 06:15:54 +01:00
for unit in ( " DAY " , " MONTH " , " YEAR " ) :
self . validate_all (
f " { unit } (x) " ,
read = {
dialect : f " { unit } (x) "
for dialect in (
" bigquery " ,
2025-02-13 14:54:32 +01:00
" drill " ,
2025-02-13 06:15:54 +01:00
" duckdb " ,
" mysql " ,
" presto " ,
" starrocks " ,
)
} ,
write = {
dialect : f " { unit } (x) "
for dialect in (
" bigquery " ,
2025-02-13 14:54:32 +01:00
" drill " ,
2025-02-13 06:15:54 +01:00
" duckdb " ,
" mysql " ,
" presto " ,
" hive " ,
" spark " ,
" starrocks " ,
)
} ,
)
def test_array ( self ) :
self . validate_all (
" ARRAY(0, 1, 2) " ,
write = {
" bigquery " : " [0, 1, 2] " ,
" duckdb " : " LIST_VALUE(0, 1, 2) " ,
" presto " : " ARRAY[0, 1, 2] " ,
" spark " : " ARRAY(0, 1, 2) " ,
} ,
)
self . validate_all (
" ARRAY_SIZE(x) " ,
write = {
" bigquery " : " ARRAY_LENGTH(x) " ,
" duckdb " : " ARRAY_LENGTH(x) " ,
2025-02-13 14:54:32 +01:00
" drill " : " REPEATED_COUNT(x) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CARDINALITY(x) " ,
" spark " : " SIZE(x) " ,
} ,
)
self . validate_all (
" ARRAY_SUM(ARRAY(1, 2)) " ,
write = {
" trino " : " REDUCE(ARRAY[1, 2], 0, (acc, x) -> acc + x, acc -> acc) " ,
" duckdb " : " LIST_SUM(LIST_VALUE(1, 2)) " ,
" hive " : " ARRAY_SUM(ARRAY(1, 2)) " ,
" presto " : " ARRAY_SUM(ARRAY[1, 2]) " ,
" spark " : " AGGREGATE(ARRAY(1, 2), 0, (acc, x) -> acc + x, acc -> acc) " ,
} ,
)
self . validate_all (
" REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
write = {
" trino " : " REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
" duckdb " : " REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
" hive " : " REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
" presto " : " REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
" spark " : " AGGREGATE(x, 0, (acc, x) -> acc + x, acc -> acc) " ,
} ,
)
def test_order_by ( self ) :
self . validate_all (
" SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname " ,
write = {
" bigquery " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname " ,
" duckdb " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname " ,
2025-02-13 08:04:41 +01:00
" oracle " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname " ,
2025-02-13 06:15:54 +01:00
" presto " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname, lname NULLS FIRST " ,
" hive " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname " ,
" spark " : " SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname NULLS LAST, lname " ,
} ,
)
def test_json ( self ) :
self . validate_all (
" JSON_EXTRACT(x, ' y ' ) " ,
read = {
" postgres " : " x-> ' y ' " ,
" presto " : " JSON_EXTRACT(x, ' y ' ) " ,
2025-02-13 14:50:31 +01:00
" starrocks " : " x-> ' y ' " ,
2025-02-13 06:15:54 +01:00
} ,
write = {
2025-02-13 08:04:41 +01:00
" oracle " : " JSON_EXTRACT(x, ' y ' ) " ,
2025-02-13 06:15:54 +01:00
" postgres " : " x-> ' y ' " ,
" presto " : " JSON_EXTRACT(x, ' y ' ) " ,
2025-02-13 14:50:31 +01:00
" starrocks " : " x-> ' y ' " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" JSON_EXTRACT_SCALAR(x, ' y ' ) " ,
read = {
" postgres " : " x->> ' y ' " ,
" presto " : " JSON_EXTRACT_SCALAR(x, ' y ' ) " ,
} ,
write = {
" postgres " : " x->> ' y ' " ,
" presto " : " JSON_EXTRACT_SCALAR(x, ' y ' ) " ,
} ,
)
self . validate_all (
" JSONB_EXTRACT(x, ' y ' ) " ,
read = {
" postgres " : " x#> ' y ' " ,
} ,
write = {
" postgres " : " x#> ' y ' " ,
} ,
)
self . validate_all (
" JSONB_EXTRACT_SCALAR(x, ' y ' ) " ,
read = {
" postgres " : " x#>> ' y ' " ,
} ,
write = {
" postgres " : " x#>> ' y ' " ,
} ,
)
def test_cross_join ( self ) :
self . validate_all (
" SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a) " ,
2025-02-13 06:15:54 +01:00
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a) " ,
" spark " : " SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a " ,
} ,
)
self . validate_all (
" SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t (a, b) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b) " ,
2025-02-13 06:15:54 +01:00
" presto " : " SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b) " ,
" spark " : " SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b " ,
} ,
)
self . validate_all (
" SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t (a) " ,
write = {
" presto " : " SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a) " ,
" spark " : " SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS a " ,
} ,
)
2025-02-13 08:04:41 +01:00
def test_lateral_subquery ( self ) :
self . validate_identity (
" SELECT art FROM tbl1 INNER JOIN LATERAL (SELECT art FROM tbl2) AS tbl2 ON tbl1.art = tbl2.art "
)
self . validate_identity (
" SELECT * FROM tbl AS t LEFT JOIN LATERAL (SELECT * FROM b WHERE b.t_id = t.t_id) AS t ON TRUE "
)
2025-02-13 06:15:54 +01:00
def test_set_operators ( self ) :
self . validate_all (
" SELECT * FROM a UNION SELECT * FROM b " ,
read = {
" bigquery " : " SELECT * FROM a UNION DISTINCT SELECT * FROM b " ,
2025-02-13 14:45:11 +01:00
" clickhouse " : " SELECT * FROM a UNION DISTINCT SELECT * FROM b " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " SELECT * FROM a UNION SELECT * FROM b " ,
" presto " : " SELECT * FROM a UNION SELECT * FROM b " ,
" spark " : " SELECT * FROM a UNION SELECT * FROM b " ,
} ,
write = {
" bigquery " : " SELECT * FROM a UNION DISTINCT SELECT * FROM b " ,
2025-02-13 14:54:32 +01:00
" drill " : " SELECT * FROM a UNION SELECT * FROM b " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " SELECT * FROM a UNION SELECT * FROM b " ,
" presto " : " SELECT * FROM a UNION SELECT * FROM b " ,
" spark " : " SELECT * FROM a UNION SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a UNION ALL SELECT * FROM b " ,
read = {
" bigquery " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
2025-02-13 14:45:11 +01:00
" clickhouse " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
" presto " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
" spark " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
} ,
write = {
" bigquery " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
" presto " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
" spark " : " SELECT * FROM a UNION ALL SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a INTERSECT SELECT * FROM b " ,
read = {
" bigquery " : " SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" presto " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" spark " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
} ,
write = {
" bigquery " : " SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" presto " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" spark " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a EXCEPT SELECT * FROM b " ,
read = {
" bigquery " : " SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" presto " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" spark " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
} ,
write = {
" bigquery " : " SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" presto " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" spark " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a UNION DISTINCT SELECT * FROM b " ,
write = {
" bigquery " : " SELECT * FROM a UNION DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a UNION SELECT * FROM b " ,
" presto " : " SELECT * FROM a UNION SELECT * FROM b " ,
" spark " : " SELECT * FROM a UNION SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b " ,
write = {
" bigquery " : " SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" presto " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
" spark " : " SELECT * FROM a INTERSECT SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
write = {
" bigquery " : " SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
" presto " : " SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
" spark " : " SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b " ,
write = {
" bigquery " : " SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" presto " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
" spark " : " SELECT * FROM a EXCEPT SELECT * FROM b " ,
} ,
)
self . validate_all (
" SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
read = {
" bigquery " : " SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
" duckdb " : " SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
" presto " : " SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
" spark " : " SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
} ,
)
def test_operators ( self ) :
2025-02-13 08:04:41 +01:00
self . validate_identity ( " some.column LIKE ' foo ' || another.column || ' bar ' || LOWER(x) " )
self . validate_identity ( " some.column LIKE ' foo ' + another.column + ' bar ' " )
2025-02-13 06:15:54 +01:00
self . validate_all (
" x ILIKE ' % y ' " ,
read = {
" clickhouse " : " x ILIKE ' % y ' " ,
" duckdb " : " x ILIKE ' % y ' " ,
" postgres " : " x ILIKE ' % y ' " ,
" snowflake " : " x ILIKE ' % y ' " ,
} ,
write = {
" bigquery " : " LOWER(x) LIKE ' % y ' " ,
" clickhouse " : " x ILIKE ' % y ' " ,
2025-02-13 14:54:32 +01:00
" drill " : " x `ILIKE` ' % y ' " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " x ILIKE ' % y ' " ,
" hive " : " LOWER(x) LIKE ' % y ' " ,
" mysql " : " LOWER(x) LIKE ' % y ' " ,
" oracle " : " LOWER(x) LIKE ' % y ' " ,
" postgres " : " x ILIKE ' % y ' " ,
" presto " : " LOWER(x) LIKE ' % y ' " ,
" snowflake " : " x ILIKE ' % y ' " ,
2025-02-13 14:48:46 +01:00
" spark " : " x ILIKE ' % y ' " ,
2025-02-13 06:15:54 +01:00
" sqlite " : " LOWER(x) LIKE ' % y ' " ,
" starrocks " : " LOWER(x) LIKE ' % y ' " ,
" trino " : " LOWER(x) LIKE ' % y ' " ,
} ,
)
self . validate_all (
" SELECT * FROM a ORDER BY col_a NULLS LAST " ,
write = {
" mysql " : UnsupportedError ,
" starrocks " : UnsupportedError ,
} ,
)
2025-02-13 14:40:43 +01:00
self . validate_all (
" POSITION( ' ' in x) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " STRPOS(x, ' ' ) " ,
2025-02-13 14:40:43 +01:00
" duckdb " : " STRPOS(x, ' ' ) " ,
" postgres " : " STRPOS(x, ' ' ) " ,
" presto " : " STRPOS(x, ' ' ) " ,
" spark " : " LOCATE( ' ' , x) " ,
2025-02-13 14:45:11 +01:00
" clickhouse " : " position(x, ' ' ) " ,
" snowflake " : " POSITION( ' ' , x) " ,
2025-02-13 14:54:32 +01:00
" mysql " : " LOCATE( ' ' , x) " ,
2025-02-13 14:40:43 +01:00
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
2025-02-13 14:45:11 +01:00
" STR_POSITION( ' a ' , x) " ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 14:54:32 +01:00
" drill " : " STRPOS(x, ' a ' ) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " STRPOS(x, ' a ' ) " ,
2025-02-13 14:40:43 +01:00
" postgres " : " STRPOS(x, ' a ' ) " ,
2025-02-13 06:15:54 +01:00
" presto " : " STRPOS(x, ' a ' ) " ,
" spark " : " LOCATE( ' a ' , x) " ,
2025-02-13 14:45:11 +01:00
" clickhouse " : " position(x, ' a ' ) " ,
" snowflake " : " POSITION( ' a ' , x) " ,
2025-02-13 14:54:32 +01:00
" mysql " : " LOCATE( ' a ' , x) " ,
2025-02-13 14:45:11 +01:00
} ,
)
self . validate_all (
" POSITION( ' a ' , x, 3) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " STRPOS(SUBSTR(x, 3), ' a ' ) + 3 - 1 " ,
2025-02-13 14:45:11 +01:00
" presto " : " STRPOS(SUBSTR(x, 3), ' a ' ) + 3 - 1 " ,
" spark " : " LOCATE( ' a ' , x, 3) " ,
" clickhouse " : " position(x, ' a ' , 3) " ,
" snowflake " : " POSITION( ' a ' , x, 3) " ,
2025-02-13 14:54:32 +01:00
" mysql " : " LOCATE( ' a ' , x, 3) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CONCAT_WS( ' - ' , ' a ' , ' b ' ) " ,
write = {
" duckdb " : " CONCAT_WS( ' - ' , ' a ' , ' b ' ) " ,
" presto " : " ARRAY_JOIN(ARRAY[ ' a ' , ' b ' ], ' - ' ) " ,
" hive " : " CONCAT_WS( ' - ' , ' a ' , ' b ' ) " ,
" spark " : " CONCAT_WS( ' - ' , ' a ' , ' b ' ) " ,
} ,
)
self . validate_all (
" CONCAT_WS( ' - ' , x) " ,
write = {
" duckdb " : " CONCAT_WS( ' - ' , x) " ,
" presto " : " ARRAY_JOIN(x, ' - ' ) " ,
" hive " : " CONCAT_WS( ' - ' , x) " ,
" spark " : " CONCAT_WS( ' - ' , x) " ,
} ,
)
2025-02-13 14:56:25 +01:00
self . validate_all (
" CONCAT(a) " ,
write = {
" mysql " : " a " ,
" tsql " : " a " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" IF(x > 1, 1, 0) " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " `IF`(x > 1, 1, 0) " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CASE WHEN x > 1 THEN 1 ELSE 0 END " ,
" presto " : " IF(x > 1, 1, 0) " ,
" hive " : " IF(x > 1, 1, 0) " ,
" spark " : " IF(x > 1, 1, 0) " ,
" tableau " : " IF x > 1 THEN 1 ELSE 0 END " ,
} ,
)
self . validate_all (
" CASE WHEN 1 THEN x ELSE 0 END " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " CASE WHEN 1 THEN x ELSE 0 END " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " CASE WHEN 1 THEN x ELSE 0 END " ,
" presto " : " CASE WHEN 1 THEN x ELSE 0 END " ,
" hive " : " CASE WHEN 1 THEN x ELSE 0 END " ,
" spark " : " CASE WHEN 1 THEN x ELSE 0 END " ,
" tableau " : " CASE WHEN 1 THEN x ELSE 0 END " ,
} ,
)
self . validate_all (
" x[y] " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " x[y] " ,
2025-02-13 06:15:54 +01:00
" duckdb " : " x[y] " ,
" presto " : " x[y] " ,
" hive " : " x[y] " ,
" spark " : " x[y] " ,
} ,
)
self . validate_all (
""" ' [ " x " ] ' """ ,
write = {
" duckdb " : """ ' [ " x " ] ' """ ,
" presto " : """ ' [ " x " ] ' """ ,
" hive " : """ ' [ " x " ] ' """ ,
" spark " : """ ' [ " x " ] ' """ ,
} ,
)
self . validate_all (
' true or null as " foo " ' ,
write = {
" bigquery " : " TRUE OR NULL AS `foo` " ,
2025-02-13 14:54:32 +01:00
" drill " : " TRUE OR NULL AS `foo` " ,
2025-02-13 06:15:54 +01:00
" duckdb " : ' TRUE OR NULL AS " foo " ' ,
" presto " : ' TRUE OR NULL AS " foo " ' ,
" hive " : " TRUE OR NULL AS `foo` " ,
" spark " : " TRUE OR NULL AS `foo` " ,
} ,
)
self . validate_all (
" SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) as foo FROM baz " ,
write = {
" bigquery " : " SELECT CASE WHEN COALESCE(bar, 0) = 1 THEN TRUE ELSE FALSE END AS foo FROM baz " ,
" duckdb " : " SELECT CASE WHEN COALESCE(bar, 0) = 1 THEN TRUE ELSE FALSE END AS foo FROM baz " ,
" presto " : " SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz " ,
" hive " : " SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz " ,
" spark " : " SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz " ,
} ,
)
self . validate_all (
" LEVENSHTEIN(col1, col2) " ,
write = {
" duckdb " : " LEVENSHTEIN(col1, col2) " ,
2025-02-13 14:54:32 +01:00
" drill " : " LEVENSHTEIN_DISTANCE(col1, col2) " ,
2025-02-13 06:15:54 +01:00
" presto " : " LEVENSHTEIN_DISTANCE(col1, col2) " ,
" hive " : " LEVENSHTEIN(col1, col2) " ,
" spark " : " LEVENSHTEIN(col1, col2) " ,
} ,
)
self . validate_all (
" LEVENSHTEIN(coalesce(col1, col2), coalesce(col2, col1)) " ,
write = {
" duckdb " : " LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1)) " ,
2025-02-13 14:54:32 +01:00
" drill " : " LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1)) " ,
2025-02-13 06:15:54 +01:00
" presto " : " LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1)) " ,
" hive " : " LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1)) " ,
" spark " : " LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1)) " ,
} ,
)
self . validate_all (
" ARRAY_FILTER(the_array, x -> x > 0) " ,
write = {
" presto " : " FILTER(the_array, x -> x > 0) " ,
" hive " : " FILTER(the_array, x -> x > 0) " ,
" spark " : " FILTER(the_array, x -> x > 0) " ,
} ,
)
2025-02-13 08:04:41 +01:00
def test_limit ( self ) :
2025-02-13 14:53:05 +01:00
self . validate_all (
" SELECT * FROM data LIMIT 10, 20 " ,
write = { " sqlite " : " SELECT * FROM data LIMIT 10 OFFSET 20 " } ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" SELECT x FROM y LIMIT 10 " ,
write = {
" sqlite " : " SELECT x FROM y LIMIT 10 " ,
" oracle " : " SELECT x FROM y FETCH FIRST 10 ROWS ONLY " ,
} ,
)
self . validate_all (
" SELECT x FROM y LIMIT 10 OFFSET 5 " ,
write = {
" sqlite " : " SELECT x FROM y LIMIT 10 OFFSET 5 " ,
" oracle " : " SELECT x FROM y OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY " ,
} ,
)
self . validate_all (
" SELECT x FROM y OFFSET 10 FETCH FIRST 3 ROWS ONLY " ,
write = {
" oracle " : " SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY " ,
} ,
)
self . validate_all (
" SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY " ,
write = {
" oracle " : " SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY " ,
} ,
)
self . validate_all (
' " x " + " y " ' ,
read = {
" clickhouse " : ' `x` + " y " ' ,
" sqlite " : ' `x` + " y " ' ,
2025-02-13 08:04:41 +01:00
" redshift " : ' " x " + " y " ' ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" [1, 2] " ,
write = {
" bigquery " : " [1, 2] " ,
" clickhouse " : " [1, 2] " ,
} ,
)
self . validate_all (
" SELECT * FROM VALUES ( ' x ' ), ( ' y ' ) AS t(z) " ,
write = {
2025-02-13 14:43:32 +01:00
" spark " : " SELECT * FROM VALUES ( ' x ' ), ( ' y ' ) AS t(z) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR, v2 VARCHAR2, nv NVARCHAR, nv2 NVARCHAR2) " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE t (c CHAR, nc CHAR, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CREATE TABLE t (c CHAR, nc CHAR, v1 STRING, v2 STRING, nv STRING, nv2 STRING) " ,
" oracle " : " CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR2, v2 VARCHAR2, nv NVARCHAR2, nv2 NVARCHAR2) " ,
" postgres " : " CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR, v2 VARCHAR, nv VARCHAR, nv2 VARCHAR) " ,
" sqlite " : " CREATE TABLE t (c TEXT, nc TEXT, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT) " ,
} ,
)
self . validate_all (
" POWER(1.2, 3.4) " ,
read = {
" hive " : " pow(1.2, 3.4) " ,
" postgres " : " power(1.2, 3.4) " ,
} ,
)
self . validate_all (
" CREATE INDEX my_idx ON tbl (a, b) " ,
read = {
" hive " : " CREATE INDEX my_idx ON TABLE tbl (a, b) " ,
" sqlite " : " CREATE INDEX my_idx ON tbl (a, b) " ,
} ,
write = {
" hive " : " CREATE INDEX my_idx ON TABLE tbl (a, b) " ,
" postgres " : " CREATE INDEX my_idx ON tbl (a, b) " ,
" sqlite " : " CREATE INDEX my_idx ON tbl (a, b) " ,
} ,
)
self . validate_all (
" CREATE UNIQUE INDEX my_idx ON tbl (a, b) " ,
read = {
" hive " : " CREATE UNIQUE INDEX my_idx ON TABLE tbl (a, b) " ,
" sqlite " : " CREATE UNIQUE INDEX my_idx ON tbl (a, b) " ,
} ,
write = {
" hive " : " CREATE UNIQUE INDEX my_idx ON TABLE tbl (a, b) " ,
" postgres " : " CREATE UNIQUE INDEX my_idx ON tbl (a, b) " ,
" sqlite " : " CREATE UNIQUE INDEX my_idx ON tbl (a, b) " ,
} ,
)
self . validate_all (
" CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024)) " ,
write = {
2025-02-13 14:46:58 +01:00
" duckdb " : " CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024)) " ,
2025-02-13 06:15:54 +01:00
" hive " : " CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 STRING, c2 STRING(1024)) " ,
" oracle " : " CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024)) " ,
" postgres " : " CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024)) " ,
" sqlite " : " CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 TEXT, c2 TEXT(1024)) " ,
2025-02-13 08:04:41 +01:00
" redshift " : " CREATE TABLE t (b1 VARBYTE, b2 VARBYTE(1024), c1 TEXT, c2 TEXT(1024)) " ,
} ,
)
def test_alias ( self ) :
self . validate_all (
" SELECT a AS b FROM x GROUP BY b " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " SELECT a AS b FROM x GROUP BY b " ,
2025-02-13 08:04:41 +01:00
" duckdb " : " SELECT a AS b FROM x GROUP BY b " ,
" presto " : " SELECT a AS b FROM x GROUP BY 1 " ,
" hive " : " SELECT a AS b FROM x GROUP BY 1 " ,
" oracle " : " SELECT a AS b FROM x GROUP BY 1 " ,
" spark " : " SELECT a AS b FROM x GROUP BY 1 " ,
} ,
)
self . validate_all (
" SELECT y x FROM my_table t " ,
write = {
2025-02-13 14:54:32 +01:00
" drill " : " SELECT y AS x FROM my_table AS t " ,
2025-02-13 08:04:41 +01:00
" hive " : " SELECT y AS x FROM my_table AS t " ,
" oracle " : " SELECT y AS x FROM my_table t " ,
" postgres " : " SELECT y AS x FROM my_table AS t " ,
" sqlite " : " SELECT y AS x FROM my_table AS t " ,
} ,
)
self . validate_all (
" WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c " ,
write = {
" hive " : " WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c " ,
" oracle " : " WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 t JOIN cte2 WHERE cte1.a = cte2.c " ,
" postgres " : " WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c " ,
" sqlite " : " WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t JOIN cte2 WHERE cte1.a = cte2.c " ,
2025-02-13 06:15:54 +01:00
} ,
)
2025-02-13 14:53:05 +01:00
def test_nullsafe_eq ( self ) :
self . validate_all (
" SELECT a IS NOT DISTINCT FROM b " ,
read = {
" mysql " : " SELECT a <=> b " ,
" postgres " : " SELECT a IS NOT DISTINCT FROM b " ,
} ,
write = {
" mysql " : " SELECT a <=> b " ,
" postgres " : " SELECT a IS NOT DISTINCT FROM b " ,
} ,
)
def test_nullsafe_neq ( self ) :
self . validate_all (
" SELECT a IS DISTINCT FROM b " ,
read = {
" postgres " : " SELECT a IS DISTINCT FROM b " ,
} ,
write = {
" mysql " : " SELECT NOT a <=> b " ,
" postgres " : " SELECT a IS DISTINCT FROM b " ,
} ,
)
def test_hash_comments ( self ) :
self . validate_all (
" SELECT 1 /* arbitrary content,,, until end-of-line */ " ,
read = {
" mysql " : " SELECT 1 # arbitrary content,,, until end-of-line " ,
" bigquery " : " SELECT 1 # arbitrary content,,, until end-of-line " ,
" clickhouse " : " SELECT 1 #! arbitrary content,,, until end-of-line " ,
} ,
)
self . validate_all (
""" /* comment1 */
SELECT
2025-02-13 14:56:25 +01:00
x , / * comment2 * /
y / * comment3 * / """ ,
2025-02-13 14:53:05 +01:00
read = {
" mysql " : """ SELECT # comment1
x , # comment2
y # comment3""",
" bigquery " : """ SELECT # comment1
x , # comment2
y # comment3""",
" clickhouse " : """ SELECT # comment1
x , # comment2
y # comment3""",
} ,
pretty = True ,
)
2025-02-13 14:54:32 +01:00
def test_transactions ( self ) :
self . validate_all (
" BEGIN TRANSACTION " ,
write = {
" bigquery " : " BEGIN TRANSACTION " ,
" mysql " : " BEGIN " ,
" postgres " : " BEGIN " ,
" presto " : " START TRANSACTION " ,
" trino " : " START TRANSACTION " ,
" redshift " : " BEGIN " ,
" snowflake " : " BEGIN " ,
" sqlite " : " BEGIN TRANSACTION " ,
} ,
)
self . validate_all (
" BEGIN " ,
read = {
" presto " : " START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE " ,
" trino " : " START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE " ,
} ,
)
self . validate_all (
" BEGIN " ,
read = {
" presto " : " START TRANSACTION ISOLATION LEVEL REPEATABLE READ " ,
" trino " : " START TRANSACTION ISOLATION LEVEL REPEATABLE READ " ,
} ,
)
self . validate_all (
" BEGIN IMMEDIATE TRANSACTION " ,
write = { " sqlite " : " BEGIN IMMEDIATE TRANSACTION " } ,
)