2025-02-13 06:15:54 +01:00
from sqlglot import ErrorLevel , ParseError , UnsupportedError , transpile
from tests . dialects . test_dialect import Validator
class TestBigQuery ( Validator ) :
dialect = " bigquery "
def test_bigquery ( self ) :
2025-02-13 16:00:14 +01:00
self . validate_identity ( " DATE(2016, 12, 25) " )
self . validate_identity ( " DATE(CAST( ' 2016-12-25 23:59:59 ' AS DATETIME)) " )
self . validate_identity ( " SELECT foo IN UNNEST(bar) AS bla " )
2025-02-13 15:58:03 +01:00
self . validate_identity ( " SELECT * FROM x-0.a " )
self . validate_identity ( " SELECT * FROM pivot CROSS JOIN foo " )
2025-02-13 15:56:32 +01:00
self . validate_identity ( " SAFE_CAST(x AS STRING) " )
self . validate_identity ( " SELECT * FROM a-b-c.mydataset.mytable " )
self . validate_identity ( " SELECT * FROM abc-def-ghi " )
self . validate_identity ( " SELECT * FROM a-b-c " )
self . validate_identity ( " SELECT * FROM my-table " )
self . validate_identity ( " SELECT * FROM my-project.mydataset.mytable " )
self . validate_identity ( " SELECT * FROM pro-ject_id.c.d CROSS JOIN foo-bar " )
self . validate_identity ( " x <> ' ' " )
2025-02-13 15:52:54 +01:00
self . validate_identity ( " DATE_TRUNC(col, WEEK(MONDAY)) " )
self . validate_identity ( " SELECT b ' abc ' " )
self . validate_identity ( """ SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[1, 2]) """ )
2025-02-13 15:47:04 +01:00
self . validate_identity ( " SELECT AS STRUCT 1 AS a, 2 AS b " )
2025-02-13 20:04:17 +01:00
self . validate_all (
" SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z " ,
write = {
" " : " SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z " ,
" bigquery " : " SELECT AS STRUCT ARRAY(SELECT AS STRUCT b FROM x) AS y FROM z " ,
" duckdb " : " SELECT { ' y ' : ARRAY(SELECT { ' b ' : b} FROM x)} FROM z " ,
} ,
)
2025-02-13 15:52:54 +01:00
self . validate_identity ( " SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b " )
2025-02-13 15:47:04 +01:00
self . validate_identity ( " SELECT AS VALUE STRUCT(1 AS a, 2 AS b) " )
2025-02-13 15:04:17 +01:00
self . validate_identity ( " SELECT STRUCT<ARRAY<STRING>>([ ' 2023-01-17 ' ]) " )
2025-02-13 15:56:32 +01:00
self . validate_identity ( " SELECT STRUCT<STRING>((SELECT a FROM b.c LIMIT 1)).* " )
2025-02-13 15:04:17 +01:00
self . validate_identity ( " SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c)) " )
2025-02-13 15:52:54 +01:00
self . validate_identity ( """ CREATE TABLE x (a STRUCT<values ARRAY<INT64>>) """ )
self . validate_identity ( """ CREATE TABLE x (a STRUCT<b STRING OPTIONS (description= ' b ' )>) """ )
2025-02-13 16:00:14 +01:00
self . validate_identity (
" DATE(CAST( ' 2016-12-25 05:30:00+07 ' AS DATETIME), ' America/Los_Angeles ' ) "
)
2025-02-13 15:52:54 +01:00
self . validate_identity (
""" CREATE TABLE x (a STRING OPTIONS (description= ' x ' )) OPTIONS (table_expiration_days=1) """
)
2025-02-13 15:45:33 +01:00
self . validate_identity (
" SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2)) "
)
2025-02-13 15:56:32 +01:00
self . validate_identity (
" CREATE TABLE IF NOT EXISTS foo AS SELECT * FROM bla EXCEPT DISTINCT (SELECT * FROM bar) LIMIT 0 "
)
2025-02-13 15:42:50 +01:00
2025-02-13 16:00:14 +01:00
self . validate_all ( " SELECT SPLIT(foo) " , write = { " bigquery " : " SELECT SPLIT(foo, ' , ' ) " } )
self . validate_all (
" cast(x as date format ' MM/DD/YYYY ' ) " ,
write = {
" bigquery " : " PARSE_DATE( ' % m/ %d / % Y ' , x) " ,
} ,
)
self . validate_all (
" cast(x as time format ' YYYY.MM.DD HH:MI:SSTZH ' ) " ,
write = {
" bigquery " : " PARSE_TIMESTAMP( ' % Y. % m. %d % I: % M: % S % z ' , x) " ,
} ,
)
2025-02-13 15:58:03 +01:00
self . validate_all ( " SELECT 1 AS hash " , write = { " bigquery " : " SELECT 1 AS `hash` " } )
2025-02-13 15:56:32 +01:00
self . validate_all ( ' x <> " " ' , write = { " bigquery " : " x <> ' ' " } )
self . validate_all ( ' x <> " " " " " " ' , write = { " bigquery " : " x <> ' ' " } )
self . validate_all ( " x <> ' ' ' ' ' ' " , write = { " bigquery " : " x <> ' ' " } )
2025-02-13 15:51:35 +01:00
self . validate_all (
" CREATE TEMP TABLE foo AS SELECT 1 " ,
write = { " bigquery " : " CREATE TEMPORARY TABLE foo AS SELECT 1 " } ,
)
2025-02-13 15:56:32 +01:00
self . validate_all (
" SELECT * FROM `SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW` " ,
write = {
" bigquery " : " SELECT * FROM SOME_PROJECT_ID.SOME_DATASET_ID.INFORMATION_SCHEMA.SOME_VIEW " ,
} ,
)
self . validate_all (
" SELECT * FROM `my-project.my-dataset.my-table` " ,
write = { " bigquery " : " SELECT * FROM `my-project`.`my-dataset`.`my-table` " } ,
)
2025-02-13 16:00:14 +01:00
self . validate_all ( " CAST(x AS DATETIME) " , read = { " " : " x::timestamp " } )
self . validate_identity ( " CAST(x AS TIMESTAMP) " )
2025-02-13 15:42:50 +01:00
self . validate_all ( " LEAST(x, y) " , read = { " sqlite " : " MIN(x, y) " } )
2025-02-13 15:47:04 +01:00
self . validate_all ( " CAST(x AS CHAR) " , write = { " bigquery " : " CAST(x AS STRING) " } )
self . validate_all ( " CAST(x AS NCHAR) " , write = { " bigquery " : " CAST(x AS STRING) " } )
self . validate_all ( " CAST(x AS NVARCHAR) " , write = { " bigquery " : " CAST(x AS STRING) " } )
2025-02-13 15:51:35 +01:00
self . validate_all ( " CAST(x AS TIMESTAMPTZ) " , write = { " bigquery " : " CAST(x AS TIMESTAMP) " } )
2025-02-13 15:56:32 +01:00
self . validate_all ( " CAST(x AS RECORD) " , write = { " bigquery " : " CAST(x AS STRUCT) " } )
2025-02-13 15:47:04 +01:00
self . validate_all (
" SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b) " ,
write = {
" bigquery " : " SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b) " ,
} ,
)
2025-02-13 14:57:38 +01:00
self . validate_all (
" REGEXP_CONTAINS( ' foo ' , ' .* ' ) " ,
read = { " bigquery " : " REGEXP_CONTAINS( ' foo ' , ' .* ' ) " } ,
write = { " mysql " : " REGEXP_LIKE( ' foo ' , ' .* ' ) " } ,
) ,
2025-02-13 06:15:54 +01:00
self . validate_all (
' " " " x " " " ' ,
write = {
" bigquery " : " ' x ' " ,
" duckdb " : " ' x ' " ,
" presto " : " ' x ' " ,
" hive " : " ' x ' " ,
" spark " : " ' x ' " ,
} ,
)
self . validate_all (
' " " " x \' " " " ' ,
write = {
" bigquery " : " ' x \\ ' ' " ,
" duckdb " : " ' x ' ' ' " ,
" presto " : " ' x ' ' ' " ,
" hive " : " ' x \\ ' ' " ,
" spark " : " ' x \\ ' ' " ,
} ,
)
2025-02-13 15:51:35 +01:00
with self . assertRaises ( ValueError ) :
2025-02-13 15:24:45 +01:00
transpile ( " ' \\ ' " , read = " bigquery " )
2025-02-13 20:04:17 +01:00
self . validate_all (
" r ' x \\ ' ' " ,
write = {
" bigquery " : " r ' x \\ ' ' " ,
" hive " : " ' x \\ ' ' " ,
} ,
)
self . validate_all (
" r ' x \\ y ' " ,
write = {
" bigquery " : " r ' x \\ y ' " ,
" hive " : " ' x \\ \\ y ' " ,
} ,
)
2025-02-13 15:04:17 +01:00
self . validate_all (
2025-02-13 15:24:45 +01:00
" ' \\ \\ ' " ,
2025-02-13 15:04:17 +01:00
write = {
" bigquery " : r " ' \\ ' " ,
2025-02-13 15:24:45 +01:00
" duckdb " : r " ' \\ ' " ,
" presto " : r " ' \\ ' " ,
2025-02-13 15:04:17 +01:00
" hive " : r " ' \\ ' " ,
} ,
)
2025-02-13 15:56:32 +01:00
self . validate_all (
r ' r " " " / \ *.* \ */ " " " ' ,
write = {
" bigquery " : r " r ' / \ *.* \ */ ' " ,
" duckdb " : r " ' / \\ *.* \\ */ ' " ,
" presto " : r " ' / \\ *.* \\ */ ' " ,
" hive " : r " ' / \\ *.* \\ */ ' " ,
" spark " : r " ' / \\ *.* \\ */ ' " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
2025-02-13 15:24:45 +01:00
r ' R " " " / \ *.* \ */ " " " ' ,
2025-02-13 06:15:54 +01:00
write = {
2025-02-13 15:56:32 +01:00
" bigquery " : r " r ' / \ *.* \ */ ' " ,
" duckdb " : r " ' / \\ *.* \\ */ ' " ,
" presto " : r " ' / \\ *.* \\ */ ' " ,
" hive " : r " ' / \\ *.* \\ */ ' " ,
" spark " : r " ' / \\ *.* \\ */ ' " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" CAST(a AS INT64) " ,
write = {
" bigquery " : " CAST(a AS INT64) " ,
" duckdb " : " CAST(a AS BIGINT) " ,
" presto " : " CAST(a AS BIGINT) " ,
" hive " : " CAST(a AS BIGINT) " ,
" spark " : " CAST(a AS LONG) " ,
} ,
)
2025-02-13 15:52:54 +01:00
self . validate_all (
" CAST(a AS BYTES) " ,
write = {
" bigquery " : " CAST(a AS BYTES) " ,
" duckdb " : " CAST(a AS BLOB) " ,
" presto " : " CAST(a AS VARBINARY) " ,
" hive " : " CAST(a AS BINARY) " ,
" spark " : " CAST(a AS BINARY) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CAST(a AS NUMERIC) " ,
write = {
" bigquery " : " CAST(a AS NUMERIC) " ,
" duckdb " : " CAST(a AS DECIMAL) " ,
" presto " : " CAST(a AS DECIMAL) " ,
" hive " : " CAST(a AS DECIMAL) " ,
" spark " : " CAST(a AS DECIMAL) " ,
} ,
)
self . validate_all (
" [1, 2, 3] " ,
read = {
" duckdb " : " LIST_VALUE(1, 2, 3) " ,
" presto " : " ARRAY[1, 2, 3] " ,
" hive " : " ARRAY(1, 2, 3) " ,
" spark " : " ARRAY(1, 2, 3) " ,
} ,
write = {
" bigquery " : " [1, 2, 3] " ,
" duckdb " : " LIST_VALUE(1, 2, 3) " ,
" presto " : " ARRAY[1, 2, 3] " ,
" hive " : " ARRAY(1, 2, 3) " ,
" spark " : " ARRAY(1, 2, 3) " ,
} ,
)
self . validate_all (
" SELECT * FROM UNNEST([ ' 7 ' , ' 14 ' ]) AS x " ,
read = {
" spark " : " SELECT * FROM UNNEST(ARRAY( ' 7 ' , ' 14 ' )) AS (x) " ,
} ,
write = {
" bigquery " : " SELECT * FROM UNNEST([ ' 7 ' , ' 14 ' ]) AS x " ,
" presto " : " SELECT * FROM UNNEST(ARRAY[ ' 7 ' , ' 14 ' ]) AS (x) " ,
" hive " : " SELECT * FROM UNNEST(ARRAY( ' 7 ' , ' 14 ' )) AS (x) " ,
" spark " : " SELECT * FROM UNNEST(ARRAY( ' 7 ' , ' 14 ' )) AS (x) " ,
} ,
)
2025-02-13 14:57:38 +01:00
self . validate_all (
" SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x) " ,
write = { " bigquery " : " SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x) " } ,
)
self . validate_all (
" SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers " ,
write = {
" bigquery " : " SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers "
} ,
)
self . validate_all (
" SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y) " ,
write = { " bigquery " : " SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y) " } ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" x IS unknown " ,
write = {
" bigquery " : " x IS NULL " ,
" duckdb " : " x IS NULL " ,
" presto " : " x IS NULL " ,
" hive " : " x IS NULL " ,
" spark " : " x IS NULL " ,
} ,
)
2025-02-13 14:49:58 +01:00
self . validate_all (
2025-02-13 15:02:59 +01:00
" CURRENT_TIMESTAMP() " ,
2025-02-13 14:49:58 +01:00
read = {
" tsql " : " GETDATE() " ,
} ,
write = {
" tsql " : " GETDATE() " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" current_datetime " ,
write = {
" bigquery " : " CURRENT_DATETIME() " ,
" presto " : " CURRENT_DATETIME() " ,
" hive " : " CURRENT_DATETIME() " ,
" spark " : " CURRENT_DATETIME() " ,
} ,
)
self . validate_all (
" current_time " ,
write = {
" bigquery " : " CURRENT_TIME() " ,
2025-02-13 15:52:54 +01:00
" duckdb " : " CURRENT_TIME " ,
2025-02-13 06:15:54 +01:00
" presto " : " CURRENT_TIME() " ,
" hive " : " CURRENT_TIME() " ,
" spark " : " CURRENT_TIME() " ,
} ,
)
self . validate_all (
" current_timestamp " ,
write = {
" bigquery " : " CURRENT_TIMESTAMP() " ,
2025-02-13 15:52:54 +01:00
" duckdb " : " CURRENT_TIMESTAMP " ,
2025-02-13 06:15:54 +01:00
" postgres " : " CURRENT_TIMESTAMP " ,
2025-02-13 15:09:11 +01:00
" presto " : " CURRENT_TIMESTAMP " ,
2025-02-13 06:15:54 +01:00
" hive " : " CURRENT_TIMESTAMP() " ,
" spark " : " CURRENT_TIMESTAMP() " ,
} ,
)
self . validate_all (
" current_timestamp() " ,
write = {
" bigquery " : " CURRENT_TIMESTAMP() " ,
2025-02-13 15:52:54 +01:00
" duckdb " : " CURRENT_TIMESTAMP " ,
2025-02-13 06:15:54 +01:00
" postgres " : " CURRENT_TIMESTAMP " ,
2025-02-13 15:09:11 +01:00
" presto " : " CURRENT_TIMESTAMP " ,
2025-02-13 06:15:54 +01:00
" hive " : " CURRENT_TIMESTAMP() " ,
" spark " : " CURRENT_TIMESTAMP() " ,
} ,
)
2025-02-13 14:52:26 +01:00
self . validate_all (
" DIV(x, y) " ,
write = {
" bigquery " : " DIV(x, y) " ,
2025-02-13 15:56:32 +01:00
" duckdb " : " x // y " ,
2025-02-13 14:52:26 +01:00
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_identity (
" SELECT ROW() OVER (y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM x WINDOW y AS (PARTITION BY CATEGORY) "
)
2025-02-13 14:44:19 +01:00
self . validate_identity (
" SELECT item, purchases, LAST_VALUE(item) OVER (item_window ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce WINDOW item_window AS (ORDER BY purchases) "
)
2025-02-13 06:15:54 +01:00
self . validate_identity (
" SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY) " ,
)
self . validate_all (
" CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>) " ,
write = {
" bigquery " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRING>) " ,
2025-02-13 14:46:14 +01:00
" duckdb " : " CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT)) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR)) " ,
" hive " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>) " ,
" spark " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>) " ,
} ,
)
self . validate_all (
" CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>) " ,
write = {
" bigquery " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>) " ,
2025-02-13 14:46:14 +01:00
" duckdb " : " CREATE TABLE db.example_table (col_a STRUCT(struct_col_a BIGINT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT))) " ,
2025-02-13 06:15:54 +01:00
" presto " : " CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR))) " ,
" hive " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a BIGINT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>) " ,
" spark " : " CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: LONG, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>) " ,
} ,
)
2025-02-13 15:24:45 +01:00
self . validate_all (
" CREATE TABLE db.example_table (x int) PARTITION BY x cluster by x " ,
write = {
" bigquery " : " CREATE TABLE db.example_table (x INT64) PARTITION BY x CLUSTER BY x " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" SELECT * FROM a WHERE b IN UNNEST([1, 2, 3]) " ,
write = {
" bigquery " : " SELECT * FROM a WHERE b IN UNNEST([1, 2, 3]) " ,
" mysql " : " SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3))) " ,
" presto " : " SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3])) " ,
" hive " : " SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3))) " ,
" spark " : " SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY(1, 2, 3))) " ,
} ,
)
# Reference: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators
with self . assertRaises ( UnsupportedError ) :
transpile (
" SELECT * FROM a INTERSECT ALL SELECT * FROM b " ,
write = " bigquery " ,
unsupported_level = ErrorLevel . RAISE ,
)
with self . assertRaises ( UnsupportedError ) :
transpile (
" SELECT * FROM a EXCEPT ALL SELECT * FROM b " ,
write = " bigquery " ,
unsupported_level = ErrorLevel . RAISE ,
)
with self . assertRaises ( ParseError ) :
transpile ( " SELECT * FROM UNNEST(x) AS x(y) " , read = " bigquery " )
self . validate_all (
" DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY) " ,
write = {
2025-02-13 15:56:32 +01:00
" postgres " : " CURRENT_DATE - INTERVAL ' 1 DAY ' " ,
2025-02-13 15:51:35 +01:00
" bigquery " : " DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY) " ,
2025-02-13 06:15:54 +01:00
} ,
)
self . validate_all (
" DATE_ADD(CURRENT_DATE(), INTERVAL 1 DAY) " ,
write = {
" bigquery " : " DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY) " ,
" duckdb " : " CURRENT_DATE + INTERVAL 1 DAY " ,
" mysql " : " DATE_ADD(CURRENT_DATE, INTERVAL 1 DAY) " ,
2025-02-13 15:56:32 +01:00
" postgres " : " CURRENT_DATE + INTERVAL ' 1 DAY ' " ,
2025-02-13 15:45:33 +01:00
" presto " : " DATE_ADD( ' DAY ' , 1, CURRENT_DATE) " ,
2025-02-13 06:15:54 +01:00
" hive " : " DATE_ADD(CURRENT_DATE, 1) " ,
" spark " : " DATE_ADD(CURRENT_DATE, 1) " ,
} ,
)
2025-02-13 14:44:19 +01:00
self . validate_all (
" DATE_DIFF(DATE ' 2010-07-07 ' , DATE ' 2008-12-25 ' , DAY) " ,
write = {
" bigquery " : " DATE_DIFF(CAST( ' 2010-07-07 ' AS DATE), CAST( ' 2008-12-25 ' AS DATE), DAY) " ,
" mysql " : " DATEDIFF(CAST( ' 2010-07-07 ' AS DATE), CAST( ' 2008-12-25 ' AS DATE)) " ,
} ,
)
self . validate_all (
" DATE_DIFF(DATE ' 2010-07-07 ' , DATE ' 2008-12-25 ' , MINUTE) " ,
write = {
" bigquery " : " DATE_DIFF(CAST( ' 2010-07-07 ' AS DATE), CAST( ' 2008-12-25 ' AS DATE), MINUTE) " ,
" mysql " : " DATEDIFF(CAST( ' 2010-07-07 ' AS DATE), CAST( ' 2008-12-25 ' AS DATE)) " ,
} ,
)
2025-02-13 06:15:54 +01:00
self . validate_all (
" CURRENT_DATE( ' UTC ' ) " ,
write = {
" mysql " : " CURRENT_DATE AT TIME ZONE ' UTC ' " ,
" postgres " : " CURRENT_DATE AT TIME ZONE ' UTC ' " ,
} ,
)
self . validate_all (
" SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10 " ,
write = {
" bigquery " : " SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10 " ,
" snowflake " : " SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10 " ,
} ,
)
2025-02-13 14:37:25 +01:00
self . validate_all (
" SELECT cola, colb FROM (VALUES (1, ' test ' )) AS tab(cola, colb) " ,
write = {
2025-02-13 14:42:49 +01:00
" spark " : " SELECT cola, colb FROM VALUES (1, ' test ' ) AS tab(cola, colb) " ,
2025-02-13 14:37:25 +01:00
" bigquery " : " SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, ' test ' AS colb)]) " ,
" snowflake " : " SELECT cola, colb FROM (VALUES (1, ' test ' )) AS tab(cola, colb) " ,
} ,
)
2025-02-13 15:52:54 +01:00
self . validate_all (
" SELECT cola, colb FROM (VALUES (1, ' test ' )) AS tab " ,
write = {
" bigquery " : " SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, ' test ' AS _c1)]) " ,
} ,
)
self . validate_all (
" SELECT cola, colb FROM (VALUES (1, ' test ' )) " ,
write = {
" bigquery " : " SELECT cola, colb FROM UNNEST([STRUCT(1 AS _c0, ' test ' AS _c1)]) " ,
} ,
)
2025-02-13 15:02:59 +01:00
self . validate_all (
" SELECT cola, colb, colc FROM (VALUES (1, ' test ' , NULL)) AS tab(cola, colb, colc) " ,
write = {
" spark " : " SELECT cola, colb, colc FROM VALUES (1, ' test ' , NULL) AS tab(cola, colb, colc) " ,
" bigquery " : " SELECT cola, colb, colc FROM UNNEST([STRUCT(1 AS cola, ' test ' AS colb, NULL AS colc)]) " ,
" snowflake " : " SELECT cola, colb, colc FROM (VALUES (1, ' test ' , NULL)) AS tab(cola, colb, colc) " ,
} ,
)
2025-02-13 14:37:25 +01:00
self . validate_all (
" SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ( ' x ' , ' y ' )) " ,
write = {
" bigquery " : " SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ( ' x ' , ' y ' )) " ,
} ,
)
2025-02-13 15:24:45 +01:00
self . validate_all (
" SELECT REGEXP_EXTRACT(abc, ' pattern(group) ' ) FROM table " ,
write = {
2025-02-13 16:00:14 +01:00
" bigquery " : " SELECT REGEXP_EXTRACT(abc, ' pattern(group) ' ) FROM table " ,
2025-02-13 15:24:45 +01:00
" duckdb " : " SELECT REGEXP_EXTRACT(abc, ' pattern(group) ' , 1) FROM table " ,
} ,
)
2025-02-13 16:00:14 +01:00
self . validate_identity ( " REGEXP_EXTRACT(`foo`, ' bar: (.+?) ' , 1, 1) " )
2025-02-13 14:53:43 +01:00
self . validate_identity ( " BEGIN A B C D E F " )
self . validate_identity ( " BEGIN TRANSACTION " )
self . validate_identity ( " COMMIT TRANSACTION " )
self . validate_identity ( " ROLLBACK TRANSACTION " )
2025-02-13 15:51:35 +01:00
self . validate_identity ( " CAST(x AS BIGNUMERIC) " )
self . validate_identity ( " SELECT * FROM UNNEST([1]) WITH ORDINALITY " )
self . validate_all (
" SELECT * FROM UNNEST([1]) WITH OFFSET " ,
write = { " bigquery " : " SELECT * FROM UNNEST([1]) WITH OFFSET AS offset " } ,
)
self . validate_all (
" SELECT * FROM UNNEST([1]) WITH OFFSET y " ,
write = { " bigquery " : " SELECT * FROM UNNEST([1]) WITH OFFSET AS y " } ,
)
2025-02-13 20:04:17 +01:00
self . validate_all (
" GENERATE_ARRAY(1, 4) " ,
read = { " bigquery " : " GENERATE_ARRAY(1, 4) " } ,
write = { " duckdb " : " GENERATE_SERIES(1, 4) " } ,
)
self . validate_all (
" TO_JSON_STRING(x) " ,
read = { " bigquery " : " TO_JSON_STRING(x) " } ,
write = {
" bigquery " : " TO_JSON_STRING(x) " ,
" duckdb " : " CAST(TO_JSON(x) AS TEXT) " ,
" presto " : " JSON_FORMAT(x) " ,
" spark " : " TO_JSON(x) " ,
} ,
)
2025-02-13 14:37:25 +01:00
def test_user_defined_functions ( self ) :
self . validate_identity (
2025-02-13 14:42:49 +01:00
" CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 NOT DETERMINISTIC LANGUAGE js AS ' return x*y; ' "
2025-02-13 14:37:25 +01:00
)
2025-02-13 15:51:35 +01:00
self . validate_identity ( " CREATE TEMPORARY FUNCTION udf(x ANY TYPE) AS (x) " )
2025-02-13 14:37:25 +01:00
self . validate_identity ( " CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y) " )
2025-02-13 14:52:26 +01:00
self . validate_identity (
" CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE <q STRING, r INT64> AS SELECT s, t "
)
2025-02-13 15:01:11 +01:00
def test_group_concat ( self ) :
self . validate_all (
" SELECT a, GROUP_CONCAT(b) FROM table GROUP BY a " ,
write = { " bigquery " : " SELECT a, STRING_AGG(b) FROM table GROUP BY a " } ,
)
2025-02-13 15:02:59 +01:00
def test_remove_precision_parameterized_types ( self ) :
self . validate_all (
" SELECT CAST(1 AS NUMERIC(10, 2)) " ,
write = {
" bigquery " : " SELECT CAST(1 AS NUMERIC) " ,
} ,
)
self . validate_all (
" CREATE TABLE test (a NUMERIC(10, 2)) " ,
write = {
" bigquery " : " CREATE TABLE test (a NUMERIC(10, 2)) " ,
} ,
)
self . validate_all (
" SELECT CAST( ' 1 ' AS STRING(10)) UNION ALL SELECT CAST( ' 2 ' AS STRING(10)) " ,
write = {
" bigquery " : " SELECT CAST( ' 1 ' AS STRING) UNION ALL SELECT CAST( ' 2 ' AS STRING) " ,
} ,
)
self . validate_all (
" SELECT cola FROM (SELECT CAST( ' 1 ' AS STRING(10)) AS cola UNION ALL SELECT CAST( ' 2 ' AS STRING(10)) AS cola) " ,
write = {
" bigquery " : " SELECT cola FROM (SELECT CAST( ' 1 ' AS STRING) AS cola UNION ALL SELECT CAST( ' 2 ' AS STRING) AS cola) " ,
} ,
)
self . validate_all (
" INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING(10)), CAST(14 AS STRING(10))) " ,
write = {
" bigquery " : " INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING), CAST(14 AS STRING)) " ,
} ,
)
2025-02-13 15:42:50 +01:00
def test_merge ( self ) :
self . validate_all (
"""
MERGE dataset . Inventory T
USING dataset . NewArrivals S ON FALSE
WHEN NOT MATCHED BY TARGET AND product LIKE ' %a % '
THEN DELETE
WHEN NOT MATCHED BY SOURCE AND product LIKE ' % b % '
THEN DELETE """ ,
write = {
" bigquery " : " MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE ' %a % ' THEN DELETE WHEN NOT MATCHED BY SOURCE AND product LIKE ' % b % ' THEN DELETE " ,
" snowflake " : " MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE ' %a % ' THEN DELETE WHEN NOT MATCHED AND product LIKE ' % b % ' THEN DELETE " ,
} ,
)
2025-02-13 15:56:32 +01:00
def test_rename_table ( self ) :
self . validate_all (
" ALTER TABLE db.t1 RENAME TO db.t2 " ,
write = {
" snowflake " : " ALTER TABLE db.t1 RENAME TO db.t2 " ,
" bigquery " : " ALTER TABLE db.t1 RENAME TO t2 " ,
} ,
)