Adding upstream version 25.18.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:52:44 +01:00 · 2025-02-13 21:52:44 +01:00 · b35dbeb6b6
commit b35dbeb6b6
parent 1341bc6477
81 changed files with 34133 additions and 33517 deletions
--- a/tests/dialects/test_bigquery.py
+++ b/tests/dialects/test_bigquery.py
@ -370,7 +370,7 @@ LANGUAGE js AS
            },
            write={
                "bigquery": "SELECT SUM(x IGNORE NULLS) AS x",
-                "duckdb": "SELECT SUM(x IGNORE NULLS) AS x",
+                "duckdb": "SELECT SUM(x) AS x",
                "postgres": "SELECT SUM(x) IGNORE NULLS AS x",
                "spark": "SELECT SUM(x) IGNORE NULLS AS x",
                "snowflake": "SELECT SUM(x) IGNORE NULLS AS x",
@ -405,7 +405,7 @@ LANGUAGE js AS
            "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
            write={
                "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x",
-                "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
+                "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x",
                "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x",
            },
        )
@ -413,7 +413,7 @@ LANGUAGE js AS
            "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
            write={
                "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x",
-                "duckdb": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
+                "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x",
                "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x",
            },
        )
@ -814,6 +814,7 @@ LANGUAGE js AS
                "presto": "SHA256(x)",
                "redshift": "SHA2(x, 256)",
                "trino": "SHA256(x)",
+                "duckdb": "SHA256(x)",
            },
        )
        self.validate_all(
--- a/tests/dialects/test_clickhouse.py
+++ b/tests/dialects/test_clickhouse.py
@ -492,6 +492,8 @@ class TestClickhouse(Validator):
                "postgres": "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)",
            },
        )
+        self.validate_identity("SELECT TRIM(TRAILING ')' FROM '(   Hello, world!   )')")
+        self.validate_identity("SELECT TRIM(LEADING '(' FROM '(   Hello, world!   )')")

    def test_clickhouse_values(self):
        values = exp.select("*").from_(
@ -620,6 +622,14 @@ class TestClickhouse(Validator):
        )
        self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")

+        # Transpiled CREATE SCHEMA may have OnCluster property set
+        create_with_cluster = exp.Create(
+            this=db_table_expr,
+            kind="SCHEMA",
+            properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]),
+        )
+        self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c")
+
        ctas_with_comment = exp.Create(
            this=exp.table_("foo"),
            kind="TABLE",
--- a/tests/dialects/test_dialect.py
+++ b/tests/dialects/test_dialect.py
@ -655,6 +655,22 @@ class TestDialect(Validator):
                "doris": "CAST('2020-01-01' AS DATETIME)",
            },
        )
+        self.validate_all(
+            "TIME_STR_TO_TIME('2020-01-01 12:13:14.123456+00:00')",
+            write={
+                "mysql": "CAST('2020-01-01 12:13:14.123456+00:00' AS DATETIME(6))",
+                "trino": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP(6))",
+                "presto": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP)",
+            },
+        )
+        self.validate_all(
+            "TIME_STR_TO_TIME('2020-01-01 12:13:14.123-08:00', 'America/Los_Angeles')",
+            write={
+                "mysql": "TIMESTAMP('2020-01-01 12:13:14.123-08:00')",
+                "trino": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP(3) WITH TIME ZONE)",
+                "presto": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP WITH TIME ZONE)",
+            },
+        )
        self.validate_all(
            "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')",
            write={
@ -662,7 +678,7 @@ class TestDialect(Validator):
                "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)",
                "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
                "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
-                "mysql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)",
+                "mysql": "TIMESTAMP('2020-01-01 12:13:14-08:00')",
                "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
                "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)",
                "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)",
@ -683,7 +699,7 @@ class TestDialect(Validator):
                "databricks": "CAST(col AS TIMESTAMP)",
                "duckdb": "CAST(col AS TIMESTAMPTZ)",
                "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'",
-                "mysql": "CAST(col AS DATETIME)",
+                "mysql": "TIMESTAMP(col)",
                "postgres": "CAST(col AS TIMESTAMPTZ)",
                "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)",
                "snowflake": "CAST(col AS TIMESTAMPTZ)",
@ -722,6 +738,13 @@ class TestDialect(Validator):
                "doris": "DATE_FORMAT(x, '%Y-%m-%d')",
            },
        )
+        self.validate_all(
+            "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')",
+            write={
+                "redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')",
+                "tsql": "FORMAT(a, 'yyyy-MM-dd HH:mm:ss.ffffff')",
+            },
+        )
        self.validate_all(
            "TIME_TO_TIME_STR(x)",
            write={
--- a/tests/dialects/test_duckdb.py
+++ b/tests/dialects/test_duckdb.py
@ -231,10 +231,6 @@ class TestDuckDB(Validator):
            },
        )

-        self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
-        self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
-        self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
-
        # https://github.com/duckdb/duckdb/releases/tag/v0.8.0
        self.assertEqual(
            parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b"
@ -243,6 +239,10 @@ class TestDuckDB(Validator):
            parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b"
        )

+        self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y")
+        self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x")
+        self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)")
+        self.validate_identity("SELECT * FROM GLOB(x)")
        self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])")
        self.validate_identity("SELECT MAP {'x': 1}")
        self.validate_identity("SELECT (MAP {'x': 1})['x']")
@ -277,6 +277,15 @@ class TestDuckDB(Validator):
        self.validate_identity("SUMMARIZE tbl").assert_is(exp.Summarize)
        self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize)
        self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)")
+        self.validate_identity(
+            "SELECT species, island, COUNT(*) FROM t GROUP BY GROUPING SETS (species), GROUPING SETS (island)"
+        )
+        self.validate_identity(
+            "SELECT species, island, COUNT(*) FROM t GROUP BY CUBE (species), CUBE (island)"
+        )
+        self.validate_identity(
+            "SELECT species, island, COUNT(*) FROM t GROUP BY ROLLUP (species), ROLLUP (island)"
+        )
        self.validate_identity(
            "SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'"
        ).assert_is(exp.Summarize)
@ -999,6 +1008,13 @@ class TestDuckDB(Validator):
                "duckdb": "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)",
            },
        )
+        self.validate_all(
+            "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE (2 ROWS)",
+            write={
+                "duckdb": "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE RESERVOIR (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE RESERVOIR (2 ROWS)",
+                "spark": "SELECT * FROM (SELECT * FROM t) TABLESAMPLE (1 ROWS) AS t1, (SELECT * FROM t) TABLESAMPLE (2 ROWS) AS t2",
+            },
+        )

    def test_array(self):
        self.validate_identity("ARRAY(SELECT id FROM t)")
@ -1256,3 +1272,20 @@ class TestDuckDB(Validator):
            read={"bigquery": "SELECT @foo"},
            write={"bigquery": "SELECT @foo", "duckdb": "SELECT $foo"},
        )
+
+    def test_ignore_nulls(self):
+        # Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM)
+        from sqlglot.dialects.duckdb import WINDOW_FUNCS_WITH_IGNORE_NULLS
+
+        agg_funcs = (exp.Sum, exp.Max, exp.Min)
+
+        for func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS + agg_funcs:
+            func = func_type(this=exp.to_identifier("col"))
+            ignore_null = exp.IgnoreNulls(this=func)
+            windowed_ignore_null = exp.Window(this=ignore_null)
+
+            if func_type in WINDOW_FUNCS_WITH_IGNORE_NULLS:
+                self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
+            else:
+                self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb"))
+                self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb"))
--- a/tests/dialects/test_mysql.py
+++ b/tests/dialects/test_mysql.py
@ -1,3 +1,6 @@
+import unittest
+import sys
+
 from sqlglot import expressions as exp
 from sqlglot.dialects.mysql import MySQL
 from tests.dialects.test_dialect import Validator
@ -637,6 +640,53 @@ class TestMySQL(Validator):
            },
        )

+        # No timezone, make sure DATETIME captures the correct precision
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123456+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.123456+00:00' AS DATETIME(6))",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.123+00:00' AS DATETIME(3))",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15+00:00' AS DATETIME)",
+        )
+
+        # With timezone, make sure the TIMESTAMP constructor is used
+        # also TIMESTAMP doesnt have the subsecond precision truncation issue that DATETIME does so we dont need to TIMESTAMP(6)
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+            write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')",
+            write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')",
+        )
+
+    @unittest.skipUnless(
+        sys.version_info >= (3, 11),
+        "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds",
+    )
+    def test_mysql_time_python311(self):
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12345+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.12345+00:00' AS DATETIME(6))",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1234+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.1234+00:00' AS DATETIME(6))",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.12+00:00' AS DATETIME(3))",
+        )
+        self.validate_identity(
+            "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1+00:00')",
+            write_sql="SELECT CAST('2023-01-01 13:14:15.1+00:00' AS DATETIME(3))",
+        )
+
    def test_mysql(self):
        self.validate_all(
            "SELECT CONCAT('11', '22')",
@ -1192,3 +1242,24 @@ COMMENT='客户账户表'"""
                            "mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', CAST('2001-02-16 20:38:40' AS DATETIME))) {unit})",
                        },
                    )
+
+    def test_at_time_zone(self):
+        with self.assertLogs() as cm:
+            # Check AT TIME ZONE doesnt discard the column name and also raises a warning
+            self.validate_identity(
+                "SELECT foo AT TIME ZONE 'UTC'",
+                write_sql="SELECT foo",
+            )
+            assert "AT TIME ZONE is not supported" in cm.output[0]
+
+    def test_json_value(self):
+        json_doc = """'{"item": "shoes", "price": "49.95"}'"""
+        self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, '$.price')""")
+        self.validate_identity(
+            f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2))"""
+        )
+
+        for on_option in ("NULL", "ERROR", "DEFAULT 1"):
+            self.validate_identity(
+                f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price"""
+            )
--- a/tests/dialects/test_oracle.py
+++ b/tests/dialects/test_oracle.py
@ -251,7 +251,6 @@ class TestOracle(Validator):
            """SELECT * FROM t ORDER BY a ASC NULLS LAST, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC NULLS FIRST""",
            """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""",
        )
-
        self.validate_all(
            "NVL(NULL, 1)",
            write={
@ -260,6 +259,29 @@ class TestOracle(Validator):
                "clickhouse": "COALESCE(NULL, 1)",
            },
        )
+        self.validate_all(
+            "LTRIM('Hello World', 'H')",
+            write={
+                "": "LTRIM('Hello World', 'H')",
+                "oracle": "LTRIM('Hello World', 'H')",
+                "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')",
+            },
+        )
+        self.validate_all(
+            "RTRIM('Hello World', 'd')",
+            write={
+                "": "RTRIM('Hello World', 'd')",
+                "oracle": "RTRIM('Hello World', 'd')",
+                "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')",
+            },
+        )
+        self.validate_all(
+            "TRIM(BOTH 'h' FROM 'Hello World')",
+            write={
+                "oracle": "TRIM(BOTH 'h' FROM 'Hello World')",
+                "clickhouse": "TRIM(BOTH 'h' FROM 'Hello World')",
+            },
+        )

    def test_join_marker(self):
        self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y")
@ -347,7 +369,7 @@ FROM warehouses, XMLTABLE(
 FROM XMLTABLE(
  'ROWSET/ROW'
  PASSING
-    dbms_xmlgen.GETXMLTYPE('SELECT table_name, column_name, data_default FROM user_tab_columns')
+    dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns')
  COLUMNS
    table_name VARCHAR2(128) PATH '*[1]',
    column_name VARCHAR2(128) PATH '*[2]',
--- a/tests/dialects/test_postgres.py
+++ b/tests/dialects/test_postgres.py
@ -675,13 +675,6 @@ class TestPostgres(Validator):
                "presto": "RTRIM(' XXX ')",
            },
        )
-        self.validate_all(
-            """'{"a":1,"b":2}'::json->'b'""",
-            write={
-                "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
-                "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
-            },
-        )
        self.validate_all(
            "TRIM(BOTH 'as' FROM 'as string as')",
            write={
@ -689,6 +682,21 @@ class TestPostgres(Validator):
                "spark": "TRIM(BOTH 'as' FROM 'as string as')",
            },
        )
+        self.validate_identity(
+            """SELECT TRIM(LEADING ' XXX ' COLLATE "de_DE")""",
+            """SELECT LTRIM(' XXX ' COLLATE "de_DE")""",
+        )
+        self.validate_identity(
+            """SELECT TRIM(TRAILING ' XXX ' COLLATE "de_DE")""",
+            """SELECT RTRIM(' XXX ' COLLATE "de_DE")""",
+        )
+        self.validate_all(
+            """'{"a":1,"b":2}'::json->'b'""",
+            write={
+                "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""",
+                "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""",
+            },
+        )
        self.validate_all(
            """merge into x as x using (select id) as y on a = b WHEN matched then update set X."A" = y.b""",
            write={
@ -780,6 +788,13 @@ class TestPostgres(Validator):
            },
        )

+        self.validate_identity(
+            'SELECT js, js IS JSON AS "json?", js IS JSON VALUE AS "scalar?", js IS JSON SCALAR AS "scalar?", js IS JSON OBJECT AS "object?", js IS JSON ARRAY AS "array?" FROM t'
+        )
+        self.validate_identity(
+            'SELECT js, js IS JSON ARRAY WITH UNIQUE KEYS AS "array w. UK?", js IS JSON ARRAY WITHOUT UNIQUE KEYS AS "array w/o UK?", js IS JSON ARRAY UNIQUE KEYS AS "array w UK 2?" FROM t'
+        )
+
    def test_ddl(self):
        # Checks that user-defined types are parsed into DataType instead of Identifier
        self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is(
--- a/tests/dialects/test_presto.py
+++ b/tests/dialects/test_presto.py
@ -634,6 +634,7 @@ class TestPresto(Validator):
                },
            )

+        self.validate_identity("SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d)")
        self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)")
        self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)")
        self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')")
@ -715,9 +716,6 @@ class TestPresto(Validator):
        )
        self.validate_all(
            "SELECT ROW(1, 2)",
-            read={
-                "spark": "SELECT STRUCT(1, 2)",
-            },
            write={
                "presto": "SELECT ROW(1, 2)",
                "spark": "SELECT STRUCT(1, 2)",
@ -835,12 +833,6 @@ class TestPresto(Validator):
                "trino": "ARRAY_AGG(x ORDER BY y DESC)",
            },
        )
-        self.validate_all(
-            "SELECT a FROM t GROUP BY a, ROLLUP(b), ROLLUP(c), ROLLUP(d)",
-            write={
-                "presto": "SELECT a FROM t GROUP BY a, ROLLUP (b, c, d)",
-            },
-        )
        self.validate_all(
            'SELECT a."b" FROM "foo"',
            write={
--- a/tests/dialects/test_snowflake.py
+++ b/tests/dialects/test_snowflake.py
@ -598,12 +598,12 @@ WHERE
        self.validate_all(
            "DIV0(foo, bar)",
            write={
-                "snowflake": "IFF(bar = 0, 0, foo / bar)",
-                "sqlite": "IIF(bar = 0, 0, CAST(foo AS REAL) / bar)",
-                "presto": "IF(bar = 0, 0, CAST(foo AS DOUBLE) / bar)",
-                "spark": "IF(bar = 0, 0, foo / bar)",
-                "hive": "IF(bar = 0, 0, foo / bar)",
-                "duckdb": "CASE WHEN bar = 0 THEN 0 ELSE foo / bar END",
+                "snowflake": "IFF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+                "sqlite": "IIF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS REAL) / bar)",
+                "presto": "IF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS DOUBLE) / bar)",
+                "spark": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+                "hive": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)",
+                "duckdb": "CASE WHEN bar = 0 AND NOT foo IS NULL THEN 0 ELSE foo / bar END",
            },
        )
        self.validate_all(
--- a/tests/dialects/test_spark.py
+++ b/tests/dialects/test_spark.py
@ -10,6 +10,7 @@ class TestSpark(Validator):
    dialect = "spark"

    def test_ddl(self):
+        self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2")
        self.validate_identity("CREATE TABLE foo AS WITH t AS (SELECT 1 AS col) SELECT col FROM t")
        self.validate_identity("CREATE TEMPORARY VIEW test AS SELECT 1")
        self.validate_identity("CREATE TABLE foo (col VARCHAR(50))")
@ -484,7 +485,7 @@ TBLPROPERTIES (
        )
        self.validate_all(
            "SELECT CAST(STRUCT('fooo') AS STRUCT<a: VARCHAR(2)>)",
-            write={"spark": "SELECT CAST(STRUCT('fooo') AS STRUCT<a: STRING>)"},
+            write={"spark": "SELECT CAST(STRUCT('fooo' AS col1) AS STRUCT<a: STRING>)"},
        )
        self.validate_all(
            "SELECT CAST(123456 AS VARCHAR(3))",
@ -710,6 +711,29 @@ TBLPROPERTIES (
        )
        self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')")

+        self.validate_all(
+            "SELECT ANY_VALUE(col, true), FIRST(col, true), FIRST_VALUE(col, true) OVER ()",
+            write={
+                "duckdb": "SELECT ANY_VALUE(col), FIRST(col), FIRST_VALUE(col IGNORE NULLS) OVER ()"
+            },
+        )
+
+        self.validate_all(
+            "SELECT STRUCT(1, 2)",
+            write={
+                "spark": "SELECT STRUCT(1 AS col1, 2 AS col2)",
+                "presto": "SELECT CAST(ROW(1, 2) AS ROW(col1 INTEGER, col2 INTEGER))",
+                "duckdb": "SELECT {'col1': 1, 'col2': 2}",
+            },
+        )
+        self.validate_all(
+            "SELECT STRUCT(x, 1, y AS col3, STRUCT(5)) FROM t",
+            write={
+                "spark": "SELECT STRUCT(x AS x, 1 AS col2, y AS col3, STRUCT(5 AS col1) AS col4) FROM t",
+                "duckdb": "SELECT {'x': x, 'col2': 1, 'col3': y, 'col4': {'col1': 5}} FROM t",
+            },
+        )
+
    def test_bool_or(self):
        self.validate_all(
            "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a",
--- a/tests/dialects/test_sqlite.py
+++ b/tests/dialects/test_sqlite.py
@ -91,6 +91,10 @@ class TestSQLite(Validator):
            read={"snowflake": "LEAST(x, y, z)"},
            write={"snowflake": "LEAST(x, y, z)"},
        )
+        self.validate_identity(
+            "SELECT * FROM station WHERE city IS NOT ''",
+            "SELECT * FROM station WHERE NOT city IS ''",
+        )

    def test_strftime(self):
        self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')")
--- a/tests/dialects/test_starrocks.py
+++ b/tests/dialects/test_starrocks.py
@ -7,6 +7,7 @@ class TestStarrocks(Validator):
    def test_identity(self):
        self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo")
        self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x")
+        self.validate_identity("SELECT [1, 2, 3]")

    def test_time(self):
        self.validate_identity("TIMESTAMP('2022-01-01')")
@ -28,3 +29,24 @@ class TestStarrocks(Validator):
                "mysql": "SELECT REGEXP_LIKE(abc, '%foo%')",
            },
        )
+
+    def test_unnest(self):
+        self.validate_identity(
+            "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t",
+            "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t(unnest)",
+        )
+
+        lateral_explode_sqls = [
+            "SELECT id, t.col FROM tbl, UNNEST(scores) AS t(col)",
+            "SELECT id, t.col FROM tbl CROSS JOIN LATERAL UNNEST(scores) AS t(col)",
+        ]
+
+        for sql in lateral_explode_sqls:
+            with self.subTest(f"Testing Starrocks roundtrip & transpilation of: {sql}"):
+                self.validate_all(
+                    sql,
+                    write={
+                        "starrocks": sql,
+                        "spark": "SELECT id, t.col FROM tbl LATERAL VIEW EXPLODE(scores) t AS col",
+                    },
+                )
--- a/tests/dialects/test_tsql.py
+++ b/tests/dialects/test_tsql.py
@ -31,6 +31,9 @@ class TestTSQL(Validator):
        self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)")
        self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0")
        self.validate_identity("TRUNCATE TABLE t1 WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))")
+        self.validate_identity(
+            "SELECT TOP 10 s.RECORDID, n.c.value('(/*:FORM_ROOT/*:SOME_TAG)[1]', 'float') AS SOME_TAG_VALUE FROM source_table.dbo.source_data AS s(nolock) CROSS APPLY FormContent.nodes('/*:FORM_ROOT') AS N(C)"
+        )
        self.validate_identity(
            "CREATE CLUSTERED INDEX [IX_OfficeTagDetail_TagDetailID] ON [dbo].[OfficeTagDetail]([TagDetailID] ASC)"
        )
@ -412,6 +415,12 @@ class TestTSQL(Validator):
            },
        )

+        # Check that TRUE and FALSE dont get expanded to (1=1) or (1=0) when used in a VALUES expression
+        self.validate_identity(
+            "SELECT val FROM (VALUES ((TRUE), (FALSE), (NULL))) AS t(val)",
+            write_sql="SELECT val FROM (VALUES ((1), (0), (NULL))) AS t(val)",
+        )
+
    def test_option(self):
        possible_options = [
            "HASH GROUP",
@ -1577,8 +1586,8 @@ WHERE
        self.validate_all(
            "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)",
            write={
-                "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.TVFTEST(t.x) AS y(z)",
-                "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.TVFTEST(t.x) AS y(z)",
+                "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z)",
+                "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x) AS y(z)",
            },
        )

--- a/tests/fixtures/optimizer/qualify_columns.sql
+++ b/tests/fixtures/optimizer/qualify_columns.sql
@ -632,6 +632,7 @@ SELECT x.a + 1 AS i, x.a + 1 + 1 AS j, x.a + 1 + 1 + 1 AS k FROM x AS x;

 # title: noop - reference comes before alias
 # execute: false
+# validate_qualify_columns: false
 SELECT i + 1 AS j, x.a + 1 AS i FROM x;
 SELECT i + 1 AS j, x.a + 1 AS i FROM x AS x;

@ -704,3 +705,6 @@ SELECT _q_0.a AS a, _q_0.b AS b, _q_1.b AS b, _q_1.c AS c FROM ((SELECT x.a AS a

 SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b);
 SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _q_0 INNER JOIN y AS y ON _q_0.a = y.b);
+
+SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS);
+SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS);
--- a/tests/fixtures/optimizer/qualify_tables.sql
+++ b/tests/fixtures/optimizer/qualify_tables.sql
@ -180,3 +180,7 @@ SELECT a.id, a_2.id FROM cat.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id;

 COPY INTO (SELECT * FROM x) TO 'data' WITH (FORMAT 'CSV');
 COPY INTO (SELECT * FROM c.db.x AS x) TO 'data' WITH (FORMAT 'CSV');
+
+# title: tablesample
+SELECT 1 FROM x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN y TABLESAMPLE SYSTEM (10 PERCENT);
+SELECT 1 FROM c.db.x AS x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN c.db.y AS y TABLESAMPLE SYSTEM (10 PERCENT);
--- a/tests/fixtures/optimizer/simplify.sql
+++ b/tests/fixtures/optimizer/simplify.sql
@ -85,6 +85,27 @@ NULL;
 NULL = NULL;
 NULL;

+1 AND 0;
+FALSE;
+
+0 AND 1;
+FALSE;
+
+0 OR 1;
+TRUE;
+
+0 OR NULL;
+NULL;
+
+NULL OR 0;
+NULL;
+
+0 AND NULL;
+FALSE;
+
+NULL AND 0;
+FALSE;
+
 -- Can't optimize this because different engines do different things
 -- mysql converts to 0 and 1 but tsql does true and false
 NULL <=> NULL;
--- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
+++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql
@ -852,10 +852,10 @@ SELECT
  SUM("x"."profit") AS "profit"
 FROM "x" AS "x"
 GROUP BY
-ROLLUP (
-  "x"."channel",
-  "x"."id"
-)
+  ROLLUP (
+    "x"."channel",
+    "x"."id"
+  )
 ORDER BY
  "channel",
  "id"
@ -2294,12 +2294,12 @@ SELECT
  SUM("y"."number_sales") AS "_col_5"
 FROM "y" AS "y"
 GROUP BY
-ROLLUP (
-  "y"."channel",
-  "y"."i_brand_id",
-  "y"."i_class_id",
-  "y"."i_category_id"
-)
+  ROLLUP (
+    "y"."channel",
+    "y"."i_brand_id",
+    "y"."i_class_id",
+    "y"."i_category_id"
+  )
 ORDER BY
  "channel",
  "i_brand_id",
@ -2608,12 +2608,12 @@ JOIN "customer_address" AS "customer_address"
  ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk"
  AND "customer_address"."ca_state" IN ('KS', 'IA', 'AL', 'UT', 'VA', 'NC', 'TX')
 GROUP BY
-ROLLUP (
-  "item"."i_item_id",
-  "customer_address"."ca_country",
-  "customer_address"."ca_state",
-  "customer_address"."ca_county"
-)
+  ROLLUP (
+    "item"."i_item_id",
+    "customer_address"."ca_country",
+    "customer_address"."ca_state",
+    "customer_address"."ca_county"
+  )
 ORDER BY
  "ca_country",
  "ca_state",
@ -2876,12 +2876,12 @@ JOIN "item" AS "item"
 JOIN "warehouse" AS "warehouse"
  ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk"
 GROUP BY
-ROLLUP (
-  "item"."i_product_name",
-  "item"."i_brand",
-  "item"."i_class",
-  "item"."i_category"
-)
+  ROLLUP (
+    "item"."i_product_name",
+    "item"."i_brand",
+    "item"."i_class",
+    "item"."i_category"
+  )
 ORDER BY
  "qoh",
  "i_product_name",
@ -3347,10 +3347,10 @@ JOIN "store" AS "store"
  ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN')
  AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
 GROUP BY
-ROLLUP (
-  "item"."i_item_id",
-  "store"."s_state"
-)
+  ROLLUP (
+    "item"."i_item_id",
+    "store"."s_state"
+  )
 ORDER BY
  "i_item_id",
  "s_state"
@ -4463,10 +4463,10 @@ JOIN "store" AS "store"
  ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN')
  AND "store"."s_store_sk" = "store_sales"."ss_store_sk"
 GROUP BY
-ROLLUP (
-  "item"."i_category",
-  "item"."i_class"
-)
+  ROLLUP (
+    "item"."i_category",
+    "item"."i_class"
+  )
 ORDER BY
  "lochierarchy" DESC,
  CASE WHEN "lochierarchy" = 0 THEN "i_category" END,
@ -9017,16 +9017,16 @@ WITH "dw1" AS (
  JOIN "store" AS "store"
    ON "store"."s_store_sk" = "store_sales"."ss_store_sk"
  GROUP BY
-  ROLLUP (
-    "item"."i_category",
-    "item"."i_class",
-    "item"."i_brand",
-    "item"."i_product_name",
-    "date_dim"."d_year",
-    "date_dim"."d_qoy",
-    "date_dim"."d_moy",
-    "store"."s_store_id"
-  )
+    ROLLUP (
+      "item"."i_category",
+      "item"."i_class",
+      "item"."i_brand",
+      "item"."i_product_name",
+      "date_dim"."d_year",
+      "date_dim"."d_qoy",
+      "date_dim"."d_moy",
+      "store"."s_store_id"
+    )
 ), "dw2" AS (
  SELECT
    "dw1"."i_category" AS "i_category",
@ -9396,10 +9396,10 @@ LEFT JOIN "_u_0" AS "_u_0"
 WHERE
  NOT "_u_0"."s_state" IS NULL
 GROUP BY
-ROLLUP (
-  "store"."s_state",
-  "store"."s_county"
-)
+  ROLLUP (
+    "store"."s_state",
+    "store"."s_county"
+  )
 ORDER BY
  "lochierarchy" DESC,
  CASE WHEN "lochierarchy" = 0 THEN "s_state" END,
@ -10433,10 +10433,10 @@ SELECT
  SUM("x"."profit") AS "profit"
 FROM "x" AS "x"
 GROUP BY
-ROLLUP (
-  "x"."channel",
-  "x"."id"
-)
+  ROLLUP (
+    "x"."channel",
+    "x"."id"
+  )
 ORDER BY
  "channel",
  "id"
@ -10937,10 +10937,10 @@ SELECT
  SUM("x"."profit") AS "profit"
 FROM "x" AS "x"
 GROUP BY
-ROLLUP (
-  "x"."channel",
-  "x"."id"
-)
+  ROLLUP (
+    "x"."channel",
+    "x"."id"
+  )
 ORDER BY
  "channel",
  "id"
@ -11539,10 +11539,10 @@ JOIN "date_dim" AS "d1"
 JOIN "item" AS "item"
  ON "item"."i_item_sk" = "web_sales"."ws_item_sk"
 GROUP BY
-ROLLUP (
-  "item"."i_category",
-  "item"."i_class"
-)
+  ROLLUP (
+    "item"."i_category",
+    "item"."i_class"
+  )
 ORDER BY
  "lochierarchy" DESC,
  CASE WHEN "lochierarchy" = 0 THEN "i_category" END,
--- a/tests/fixtures/pretty.sql
+++ b/tests/fixtures/pretty.sql
@ -99,14 +99,14 @@ WITH cte1 AS (
      FOO(CASE WHEN a AND b THEN c AND d ELSE 3 END)
    GROUP BY
      x,
-    GROUPING SETS (
-      a,
-      (b, c)
-    ),
-    CUBE (
-      y,
-      z
-    )
+      GROUPING SETS (
+        a,
+        (b, c)
+      ),
+      CUBE (
+        y,
+        z
+      )
  ) AS x
 )
 SELECT
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@ -1,4 +1,3 @@
-import time
 import unittest

 from sqlglot import exp, parse_one
@ -46,8 +45,5 @@ class TestGenerator(unittest.TestCase):
        assert parse_one("X as 1").sql(identify="safe") == 'X AS "1"'

    def test_generate_nested_binary(self):
-        sql = "SELECT 'foo'" + (" || 'foo'" * 500)
-
-        now = time.time()
-        self.assertEqual(parse_one(sql).sql(), sql)
-        self.assertLessEqual(time.time() - now, 0.1)
+        sql = "SELECT 'foo'" + (" || 'foo'" * 1000)
+        self.assertEqual(parse_one(sql).sql(copy=False), sql)
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@ -27,11 +27,11 @@ def parse_and_optimize(func, sql, read_dialect, **kwargs):
    return func(parse_one(sql, read=read_dialect), **kwargs)


-def qualify_columns(expression, **kwargs):
+def qualify_columns(expression, validate_qualify_columns=True, **kwargs):
    expression = optimizer.qualify.qualify(
        expression,
        infer_schema=True,
-        validate_qualify_columns=False,
+        validate_qualify_columns=validate_qualify_columns,
        identify=False,
        **kwargs,
    )
@ -135,11 +135,17 @@ class TestOptimizer(unittest.TestCase):
                    continue
                dialect = meta.get("dialect")
                leave_tables_isolated = meta.get("leave_tables_isolated")
+                validate_qualify_columns = meta.get("validate_qualify_columns")

                func_kwargs = {**kwargs}
                if leave_tables_isolated is not None:
                    func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated)

+                if validate_qualify_columns is not None:
+                    func_kwargs["validate_qualify_columns"] = string_to_bool(
+                        validate_qualify_columns
+                    )
+
                if set_dialect and dialect:
                    func_kwargs["dialect"] = dialect

--- a/tests/test_time.py
+++ b/tests/test_time.py
@ -1,6 +1,7 @@
 import unittest
+import sys

-from sqlglot.time import format_time
+from sqlglot.time import format_time, subsecond_precision


 class TestTime(unittest.TestCase):
@ -12,3 +13,22 @@ class TestTime(unittest.TestCase):
        self.assertEqual(format_time("aa", mapping), "c")
        self.assertEqual(format_time("aaada", mapping), "cbdb")
        self.assertEqual(format_time("da", mapping), "db")
+
+    def test_subsecond_precision(self):
+        self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456+00:00"))
+        self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.123+00:00"))
+        self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14+00:00"))
+        self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14"))
+        self.assertEqual(0, subsecond_precision("garbage"))
+
+    @unittest.skipUnless(
+        sys.version_info >= (3, 11),
+        "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds",
+    )
+    def test_subsecond_precision_python311(self):
+        # ref: https://docs.python.org/3/whatsnew/3.11.html#datetime
+        self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456789+00:00"))
+        self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.12345+00:00"))
+        self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.1234+00:00"))
+        self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.12+00:00"))
+        self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.1+00:00"))