1
0
Fork 0
sqlglot/benchmarks/bench.py
Daniel Baumann 4c76f76a29
Merging upstream version 25.31.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:57:20 +01:00

230 lines
6.7 KiB
Python

import collections.abc
from helpers import ascii_table
# moz_sql_parser 3.10 compatibility
collections.Iterable = collections.abc.Iterable
import timeit
import numpy as np
# import sqlfluff
# import moz_sql_parser
# import sqloxide
# import sqlparse
# import sqltree
import sqlglot
long = """
SELECT
"e"."employee_id" AS "Employee #",
"e"."first_name" || ' ' || "e"."last_name" AS "Name",
"e"."email" AS "Email",
"e"."phone_number" AS "Phone",
TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date",
TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary",
"e"."commission_pct" AS "Commission %",
'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job",
TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary",
"l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location",
"jh"."job_id" AS "History Job ID",
'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title",
case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end
FROM "employees" AS e
JOIN "jobs" AS j
ON "e"."job_id" = "j"."job_id"
LEFT JOIN "employees" AS m
ON "e"."manager_id" = "m"."employee_id"
LEFT JOIN "departments" AS d
ON "d"."department_id" = "e"."department_id"
LEFT JOIN "employees" AS dm
ON "d"."manager_id" = "dm"."employee_id"
LEFT JOIN "locations" AS l
ON "d"."location_id" = "l"."location_id"
LEFT JOIN "countries" AS c
ON "l"."country_id" = "c"."country_id"
LEFT JOIN "regions" AS r
ON "c"."region_id" = "r"."region_id"
LEFT JOIN "job_history" AS jh
ON "e"."employee_id" = "jh"."employee_id"
LEFT JOIN "jobs" AS jj
ON "jj"."job_id" = "jh"."job_id"
LEFT JOIN "departments" AS dd
ON "dd"."department_id" = "jh"."department_id"
ORDER BY
"e"."employee_id"
"""
short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x"
crazy = "SELECT 1+"
crazy += "+".join(str(i) for i in range(500))
crazy += " AS a, 2*"
crazy += "*".join(str(i) for i in range(500))
crazy += " AS b FROM x"
tpch = """
WITH "_e_0" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
), "_e_1" AS (
SELECT
"region"."r_regionkey" AS "r_regionkey",
"region"."r_name" AS "r_name"
FROM "region" AS "region"
WHERE
"region"."r_name" = 'EUROPE'
)
SELECT
"supplier"."s_acctbal" AS "s_acctbal",
"supplier"."s_name" AS "s_name",
"nation"."n_name" AS "n_name",
"part"."p_partkey" AS "p_partkey",
"part"."p_mfgr" AS "p_mfgr",
"supplier"."s_address" AS "s_address",
"supplier"."s_phone" AS "s_phone",
"supplier"."s_comment" AS "s_comment"
FROM (
SELECT
"part"."p_partkey" AS "p_partkey",
"part"."p_mfgr" AS "p_mfgr",
"part"."p_type" AS "p_type",
"part"."p_size" AS "p_size"
FROM "part" AS "part"
WHERE
"part"."p_size" = 15
AND "part"."p_type" LIKE '%BRASS'
) AS "part"
LEFT JOIN (
SELECT
MIN("partsupp"."ps_supplycost") AS "_col_0",
"partsupp"."ps_partkey" AS "_u_1"
FROM "_e_0" AS "partsupp"
CROSS JOIN "_e_1" AS "region"
JOIN (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_regionkey" AS "n_regionkey"
FROM "nation" AS "nation"
) AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_nationkey" AS "s_nationkey"
FROM "supplier" AS "supplier"
) AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
GROUP BY
"partsupp"."ps_partkey"
) AS "_u_0"
ON "part"."p_partkey" = "_u_0"."_u_1"
CROSS JOIN "_e_1" AS "region"
JOIN (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name",
"nation"."n_regionkey" AS "n_regionkey"
FROM "nation" AS "nation"
) AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "_e_0" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_name" AS "s_name",
"supplier"."s_address" AS "s_address",
"supplier"."s_nationkey" AS "s_nationkey",
"supplier"."s_phone" AS "s_phone",
"supplier"."s_acctbal" AS "s_acctbal",
"supplier"."s_comment" AS "s_comment"
FROM "supplier" AS "supplier"
) AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
WHERE
"partsupp"."ps_supplycost" = "_u_0"."_col_0"
AND NOT "_u_0"."_u_1" IS NULL
ORDER BY
"supplier"."s_acctbal" DESC,
"nation"."n_name",
"supplier"."s_name",
"part"."p_partkey"
LIMIT 100
"""
def sqlglot_parse(sql):
sqlglot.tokens.USE_RS_TOKENIZER = False
sqlglot.parse_one(sql, error_level=sqlglot.ErrorLevel.IGNORE)
def sqlglotrs_parse(sql):
sqlglot.tokens.USE_RS_TOKENIZER = True
sqlglot.parse_one(sql, error_level=sqlglot.ErrorLevel.IGNORE)
def sqltree_parse(sql):
sqltree.api.sqltree(sql.replace('"', "`").replace("''", '"'))
def sqlparse_parse(sql):
sqlparse.parse(sql)
def moz_sql_parser_parse(sql):
moz_sql_parser.parse(sql)
def sqloxide_parse(sql):
sqloxide.parse_sql(sql, dialect="ansi")
def sqlfluff_parse(sql):
sqlfluff.parse(sql)
def diff(row, column):
if column == "Query":
return ""
column = row[column]
if isinstance(column, str):
return " (N/A)"
return f" ({str(column / row['sqlglot'])[0:5]})"
libs = [
"sqlglot",
"sqlglotrs",
# "sqlfluff",
# "sqltree",
# "sqlparse",
# "moz_sql_parser",
# "sqloxide",
]
table = []
for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items():
row = {"Query": name}
table.append(row)
for lib in libs:
try:
row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3))
except Exception as e:
print(e)
row[lib] = "error"
print(
ascii_table(
[
{k: v if v == "Query" else str(row[k])[0:7] + diff(row, k) for k, v in row.items()}
for row in table
]
)
)