1
0
Fork 0

Merging upstream version 26.10.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-03-17 07:38:58 +01:00
parent f2e3d707cd
commit 273cfbc19c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
66 changed files with 79349 additions and 76628 deletions

View file

@ -8,6 +8,10 @@ class TestDuckDB(Validator):
dialect = "duckdb"
def test_duckdb(self):
self.validate_identity("x::timestamp", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp without time zone", "CAST(x AS TIMESTAMP)")
self.validate_identity("x::timestamp with time zone", "CAST(x AS TIMESTAMPTZ)")
with self.assertRaises(ParseError):
parse_one("1 //", read="duckdb")
@ -1482,3 +1486,86 @@ class TestDuckDB(Validator):
def test_analyze(self):
self.validate_identity("ANALYZE")
def test_prefix_aliases(self):
# https://duckdb.org/2025/02/25/prefix-aliases-in-sql.html
self.validate_identity(
"SELECT foo: 1",
"SELECT 1 AS foo",
)
self.validate_identity(
"SELECT foo: bar",
"SELECT bar AS foo",
)
self.validate_identity(
"SELECT foo: t.col FROM t",
"SELECT t.col AS foo FROM t",
)
self.validate_identity(
'SELECT "foo" /* bla */: 1',
'SELECT 1 AS "foo" /* bla */',
)
self.validate_identity(
'SELECT "foo": 1 /* bla */',
'SELECT 1 AS "foo" /* bla */',
)
self.validate_identity(
'SELECT "foo": /* bla */ 1',
'SELECT 1 AS "foo" /* bla */',
)
self.validate_identity(
'SELECT "foo": /* bla */ 1 /* foo */',
'SELECT 1 AS "foo" /* bla */ /* foo */',
)
self.validate_identity(
'SELECT "foo": 1',
'SELECT 1 AS "foo"',
)
self.validate_identity(
"SELECT foo: 1, bar: 2, baz: 3",
"SELECT 1 AS foo, 2 AS bar, 3 AS baz",
)
self.validate_identity(
"SELECT e: 1 + 2, f: len('asdf'), s: (SELECT 42)",
"SELECT 1 + 2 AS e, LENGTH('asdf') AS f, (SELECT 42) AS s",
)
self.validate_identity(
"SELECT * FROM foo: bar",
"SELECT * FROM bar AS foo",
)
self.validate_identity(
"SELECT * FROM foo: c.db.tbl",
"SELECT * FROM c.db.tbl AS foo",
)
self.validate_identity(
"SELECT * FROM foo /* bla */: bar",
"SELECT * FROM bar AS foo /* bla */",
)
self.validate_identity(
"SELECT * FROM foo /* bla */: bar /* baz */",
"SELECT * FROM bar AS foo /* bla */ /* baz */",
)
self.validate_identity(
"SELECT * FROM foo /* bla */: /* baz */ bar /* boo */",
"SELECT * FROM bar AS foo /* bla */ /* baz */ /* boo */",
)
self.validate_identity(
"SELECT * FROM r: range(10), v: (VALUES (42)), s: (FROM range(10))",
"SELECT * FROM RANGE(0, 10) AS r, (VALUES (42)) AS v, (SELECT * FROM RANGE(0, 10)) AS s",
)
self.validate_identity(
"""
SELECT
l_returnflag,
l_linestatus,
sum_qty: sum(l_quantity),
sum_base_price: sum(l_extendedprice),
sum_disc_price: sum(l_extendedprice * (1-l_discount)),
sum_charge: sum(l_extendedprice * (1-l_discount) * (1+l_tax)),
avg_qty: avg(l_quantity),
avg_price: avg(l_extendedprice),
avg_disc: avg(l_discount),
count_order: count(*)
""",
"SELECT l_returnflag, l_linestatus, SUM(l_quantity) AS sum_qty, SUM(l_extendedprice) AS sum_base_price, SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price, SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, AVG(l_quantity) AS avg_qty, AVG(l_extendedprice) AS avg_price, AVG(l_discount) AS avg_disc, COUNT(*) AS count_order",
)

View file

@ -810,6 +810,21 @@ class TestHive(Validator):
self.validate_identity("SELECT 1_2")
self.validate_all(
"SELECT MAP(*), STRUCT(*) FROM t",
read={
"hive": "SELECT MAP(*), STRUCT(*) FROM t",
"spark2": "SELECT MAP(*), STRUCT(*) FROM t",
"spark": "SELECT MAP(*), STRUCT(*) FROM t",
"databricks": "SELECT MAP(*), STRUCT(*) FROM t",
},
write={
"spark2": "SELECT MAP(*), STRUCT(*) FROM t",
"spark": "SELECT MAP(*), STRUCT(*) FROM t",
"databricks": "SELECT MAP(*), STRUCT(*) FROM t",
},
)
def test_escapes(self) -> None:
self.validate_identity("'\n'", "'\\n'")
self.validate_identity("'\\n'")

View file

@ -143,13 +143,13 @@ class TestOracle(Validator):
},
)
self.validate_all(
"CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
"CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')",
read={
"postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
},
write={
"oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
"postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')",
"oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')",
"postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')",
},
)
self.validate_all(

View file

@ -588,6 +588,16 @@ class TestSnowflake(Validator):
"teradata": "TO_CHAR(x, y)",
},
)
self.validate_identity(
"TO_CHAR(foo::DATE, 'yyyy')", "TO_CHAR(CAST(CAST(foo AS DATE) AS TIMESTAMP), 'yyyy')"
)
self.validate_all(
"TO_CHAR(foo::TIMESTAMP, 'YYYY-MM')",
write={
"snowflake": "TO_CHAR(CAST(foo AS TIMESTAMP), 'yyyy-mm')",
"duckdb": "STRFTIME(CAST(foo AS TIMESTAMP), '%Y-%m')",
},
)
self.validate_all(
"SQUARE(x)",
write={
@ -998,6 +1008,15 @@ class TestSnowflake(Validator):
self.validate_identity("CREATE TABLE t (id INT PRIMARY KEY AUTOINCREMENT)")
self.validate_all(
"SELECT HEX_DECODE_BINARY('65')",
write={
"bigquery": "SELECT FROM_HEX('65')",
"duckdb": "SELECT UNHEX('65')",
"snowflake": "SELECT HEX_DECODE_BINARY('65')",
},
)
def test_null_treatment(self):
self.validate_all(
r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1",
@ -1608,44 +1627,21 @@ class TestSnowflake(Validator):
"CREATE PROCEDURE a.b.c(x INT, y VARIANT) RETURNS OBJECT EXECUTE AS CALLER AS 'BEGIN SELECT 1; END;'"
)
def test_table_literal(self):
# All examples from https://docs.snowflake.com/en/sql-reference/literals-table.html
self.validate_all(
r"""SELECT * FROM TABLE('MYTABLE')""",
write={"snowflake": r"""SELECT * FROM TABLE('MYTABLE')"""},
)
self.validate_all(
r"""SELECT * FROM TABLE('MYDB."MYSCHEMA"."MYTABLE"')""",
write={"snowflake": r"""SELECT * FROM TABLE('MYDB."MYSCHEMA"."MYTABLE"')"""},
)
# Per Snowflake documentation at https://docs.snowflake.com/en/sql-reference/literals-table.html
# one can use either a " ' " or " $$ " to enclose the object identifier.
# Capturing the single tokens seems like lot of work. Hence adjusting tests to use these interchangeably,
self.validate_all(
r"""SELECT * FROM TABLE($$MYDB. "MYSCHEMA"."MYTABLE"$$)""",
write={"snowflake": r"""SELECT * FROM TABLE('MYDB. "MYSCHEMA"."MYTABLE"')"""},
)
self.validate_all(
r"""SELECT * FROM TABLE($MYVAR)""",
write={"snowflake": r"""SELECT * FROM TABLE($MYVAR)"""},
)
self.validate_all(
r"""SELECT * FROM TABLE(?)""",
write={"snowflake": r"""SELECT * FROM TABLE(?)"""},
)
self.validate_all(
r"""SELECT * FROM TABLE(:BINDING)""",
write={"snowflake": r"""SELECT * FROM TABLE(:BINDING)"""},
)
self.validate_all(
r"""SELECT * FROM TABLE($MYVAR) WHERE COL1 = 10""",
write={"snowflake": r"""SELECT * FROM TABLE($MYVAR) WHERE COL1 = 10"""},
def test_table_function(self):
self.validate_identity("SELECT * FROM TABLE('MYTABLE')")
self.validate_identity("SELECT * FROM TABLE($MYVAR)")
self.validate_identity("SELECT * FROM TABLE(?)")
self.validate_identity("SELECT * FROM TABLE(:BINDING)")
self.validate_identity("SELECT * FROM TABLE($MYVAR) WHERE COL1 = 10")
self.validate_identity("SELECT * FROM TABLE('t1') AS f")
self.validate_identity("SELECT * FROM (TABLE('t1') CROSS JOIN TABLE('t2'))")
self.validate_identity("SELECT * FROM TABLE('t1'), LATERAL (SELECT * FROM t2)")
self.validate_identity("SELECT * FROM TABLE('t1') UNION ALL SELECT * FROM TABLE('t2')")
self.validate_identity("SELECT * FROM TABLE('t1') TABLESAMPLE BERNOULLI (20.3)")
self.validate_identity("""SELECT * FROM TABLE('MYDB."MYSCHEMA"."MYTABLE"')""")
self.validate_identity(
'SELECT * FROM TABLE($$MYDB. "MYSCHEMA"."MYTABLE"$$)',
"""SELECT * FROM TABLE('MYDB. "MYSCHEMA"."MYTABLE"')""",
)
def test_flatten(self):

View file

@ -445,7 +445,7 @@ TBLPROPERTIES (
self.validate_all(
"SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
read={
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))",
"duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMPTZ), CAST('1997-02-28 10:30:00' AS TIMESTAMPTZ))",
},
write={
"spark": "SELECT DATEDIFF(MONTH, TO_DATE(CAST('1996-10-30' AS TIMESTAMP)), TO_DATE(CAST('1997-02-28 10:30:00' AS TIMESTAMP)))",
@ -488,6 +488,13 @@ TBLPROPERTIES (
"spark": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP(x, 'zZ')",
write={
"": "SELECT STR_TO_TIME(x, '%Z%z')",
"duckdb": "SELECT STRPTIME(x, '%Z%z')",
},
)
self.validate_all(
"SELECT TO_TIMESTAMP('2016-12-31', 'yyyy-MM-dd')",
read={

View file

@ -843,6 +843,10 @@ x;
COALESCE(x, 1) = 2;
NOT x IS NULL AND x = 2;
# dialect: redshift
COALESCE(x, 1) = 2;
COALESCE(x, 1) = 2;
2 = COALESCE(x, 1);
NOT x IS NULL AND x = 2;

View file

@ -1,6 +1,6 @@
import sys
import datetime
import math
import sys
import unittest
from sqlglot import ParseError, alias, exp, parse_one
@ -277,6 +277,16 @@ class TestExpressions(unittest.TestCase):
"SELECT * FROM (SELECT 1) AS a /* source: a-b.c */",
)
def test_expand_with_lazy_source_provider(self):
self.assertEqual(
exp.expand(
parse_one('select * from "a-b"."C" AS a'),
{"`a-b`.c": lambda: parse_one("select 1", dialect="spark")},
dialect="spark",
).sql(),
"SELECT * FROM (SELECT 1) AS a /* source: a-b.c */",
)
def test_replace_placeholders(self):
self.assertEqual(
exp.replace_placeholders(
@ -838,6 +848,7 @@ class TestExpressions(unittest.TestCase):
def test_convert(self):
from collections import namedtuple
import pytz
PointTuple = namedtuple("Point", ["x", "y"])

View file

@ -576,3 +576,48 @@ class TestLineage(unittest.TestCase):
self.assertEqual(node.downstream[0].name, "t.empid")
self.assertEqual(node.downstream[0].reference_node_name, "t")
self.assertEqual(node.downstream[0].downstream[0].name, "quarterly_sales.empid")
def test_table_udtf_snowflake(self) -> None:
lateral_flatten = """
SELECT f.value:external_id::string AS external_id
FROM database_name.schema_name.table_name AS raw,
LATERAL FLATTEN(events) AS f
"""
table_flatten = """
SELECT f.value:external_id::string AS external_id
FROM database_name.schema_name.table_name AS raw
JOIN TABLE(FLATTEN(events)) AS f
"""
lateral_node = lineage("external_id", lateral_flatten, dialect="snowflake")
table_node = lineage("external_id", table_flatten, dialect="snowflake")
self.assertEqual(lateral_node.name, "EXTERNAL_ID")
self.assertEqual(table_node.name, "EXTERNAL_ID")
lateral_node = lateral_node.downstream[0]
table_node = table_node.downstream[0]
self.assertEqual(lateral_node.name, "F.VALUE")
self.assertEqual(
lateral_node.source.sql("snowflake"),
"LATERAL FLATTEN(RAW.EVENTS) AS F(SEQ, KEY, PATH, INDEX, VALUE, THIS)",
)
self.assertEqual(table_node.name, "F.VALUE")
self.assertEqual(table_node.source.sql("snowflake"), "TABLE(FLATTEN(RAW.EVENTS)) AS F")
lateral_node = lateral_node.downstream[0]
table_node = table_node.downstream[0]
self.assertEqual(lateral_node.name, "RAW.EVENTS")
self.assertEqual(
lateral_node.source.sql("snowflake"),
"DATABASE_NAME.SCHEMA_NAME.TABLE_NAME AS RAW",
)
self.assertEqual(table_node.name, "RAW.EVENTS")
self.assertEqual(
table_node.source.sql("snowflake"),
"DATABASE_NAME.SCHEMA_NAME.TABLE_NAME AS RAW",
)