1
0
Fork 0
sqlglot/benchmarks/bench.py
Daniel Baumann e0f3bbb5f3
Merging upstream version 10.5.2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 15:03:38 +01:00

233 lines
6.8 KiB
Python

import collections.abc
# moz_sql_parser 3.10 compatibility
collections.Iterable = collections.abc.Iterable
import gc
import timeit
import numpy as np
import sqlfluff
import moz_sql_parser
import sqloxide
import sqlparse
import sqltree
import sqlglot
long = """
SELECT
"e"."employee_id" AS "Employee #",
"e"."first_name" || ' ' || "e"."last_name" AS "Name",
"e"."email" AS "Email",
"e"."phone_number" AS "Phone",
TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date",
TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary",
"e"."commission_pct" AS "Commission %",
'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job",
TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary",
"l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location",
"jh"."job_id" AS "History Job ID",
'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title",
case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end
FROM "employees" AS e
JOIN "jobs" AS j
ON "e"."job_id" = "j"."job_id"
LEFT JOIN "employees" AS m
ON "e"."manager_id" = "m"."employee_id"
LEFT JOIN "departments" AS d
ON "d"."department_id" = "e"."department_id"
LEFT JOIN "employees" AS dm
ON "d"."manager_id" = "dm"."employee_id"
LEFT JOIN "locations" AS l
ON "d"."location_id" = "l"."location_id"
LEFT JOIN "countries" AS c
ON "l"."country_id" = "c"."country_id"
LEFT JOIN "regions" AS r
ON "c"."region_id" = "r"."region_id"
LEFT JOIN "job_history" AS jh
ON "e"."employee_id" = "jh"."employee_id"
LEFT JOIN "jobs" AS jj
ON "jj"."job_id" = "jh"."job_id"
LEFT JOIN "departments" AS dd
ON "dd"."department_id" = "jh"."department_id"
ORDER BY
"e"."employee_id"
"""
short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x"
crazy = "SELECT 1+"
crazy += "+".join(str(i) for i in range(500))
crazy += " AS a, 2*"
crazy += "*".join(str(i) for i in range(500))
crazy += " AS b FROM x"
tpch = """
WITH "_e_0" AS (
SELECT
"partsupp"."ps_partkey" AS "ps_partkey",
"partsupp"."ps_suppkey" AS "ps_suppkey",
"partsupp"."ps_supplycost" AS "ps_supplycost"
FROM "partsupp" AS "partsupp"
), "_e_1" AS (
SELECT
"region"."r_regionkey" AS "r_regionkey",
"region"."r_name" AS "r_name"
FROM "region" AS "region"
WHERE
"region"."r_name" = 'EUROPE'
)
SELECT
"supplier"."s_acctbal" AS "s_acctbal",
"supplier"."s_name" AS "s_name",
"nation"."n_name" AS "n_name",
"part"."p_partkey" AS "p_partkey",
"part"."p_mfgr" AS "p_mfgr",
"supplier"."s_address" AS "s_address",
"supplier"."s_phone" AS "s_phone",
"supplier"."s_comment" AS "s_comment"
FROM (
SELECT
"part"."p_partkey" AS "p_partkey",
"part"."p_mfgr" AS "p_mfgr",
"part"."p_type" AS "p_type",
"part"."p_size" AS "p_size"
FROM "part" AS "part"
WHERE
"part"."p_size" = 15
AND "part"."p_type" LIKE '%BRASS'
) AS "part"
LEFT JOIN (
SELECT
MIN("partsupp"."ps_supplycost") AS "_col_0",
"partsupp"."ps_partkey" AS "_u_1"
FROM "_e_0" AS "partsupp"
CROSS JOIN "_e_1" AS "region"
JOIN (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_regionkey" AS "n_regionkey"
FROM "nation" AS "nation"
) AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_nationkey" AS "s_nationkey"
FROM "supplier" AS "supplier"
) AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
GROUP BY
"partsupp"."ps_partkey"
) AS "_u_0"
ON "part"."p_partkey" = "_u_0"."_u_1"
CROSS JOIN "_e_1" AS "region"
JOIN (
SELECT
"nation"."n_nationkey" AS "n_nationkey",
"nation"."n_name" AS "n_name",
"nation"."n_regionkey" AS "n_regionkey"
FROM "nation" AS "nation"
) AS "nation"
ON "nation"."n_regionkey" = "region"."r_regionkey"
JOIN "_e_0" AS "partsupp"
ON "part"."p_partkey" = "partsupp"."ps_partkey"
JOIN (
SELECT
"supplier"."s_suppkey" AS "s_suppkey",
"supplier"."s_name" AS "s_name",
"supplier"."s_address" AS "s_address",
"supplier"."s_nationkey" AS "s_nationkey",
"supplier"."s_phone" AS "s_phone",
"supplier"."s_acctbal" AS "s_acctbal",
"supplier"."s_comment" AS "s_comment"
FROM "supplier" AS "supplier"
) AS "supplier"
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
WHERE
"partsupp"."ps_supplycost" = "_u_0"."_col_0"
AND NOT "_u_0"."_u_1" IS NULL
ORDER BY
"supplier"."s_acctbal" DESC,
"nation"."n_name",
"supplier"."s_name",
"part"."p_partkey"
LIMIT 100
"""
def sqlglot_parse(sql):
sqlglot.parse(sql, error_level=sqlglot.ErrorLevel.IGNORE)
def sqltree_parse(sql):
sqltree.api.sqltree(sql.replace('"', '`').replace("''", '"'))
def sqlparse_parse(sql):
sqlparse.parse(sql)
def moz_sql_parser_parse(sql):
moz_sql_parser.parse(sql)
def sqloxide_parse(sql):
sqloxide.parse_sql(sql, dialect="ansi")
def sqlfluff_parse(sql):
sqlfluff.parse(sql)
def border(columns):
columns = " | ".join(columns)
return f"| {columns} |"
def diff(row, column):
if column == "Query":
return ""
column = row[column]
if isinstance(column, str):
return " (N/A)"
return f" ({str(column / row['sqlglot'])[0:5]})"
libs = [
"sqlglot",
"sqlfluff",
"sqltree",
"sqlparse",
"moz_sql_parser",
"sqloxide",
]
table = []
for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items():
row = {"Query": name}
table.append(row)
for lib in libs:
try:
row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3))
except Exception as e:
print(e)
row[lib] = "error"
columns = ["Query"] + libs
widths = {column: max(len(column), 15) for column in columns}
lines = [border(column.rjust(width) for column, width in widths.items())]
lines.append(border(str("-" * width) for width in widths.values()))
for i, row in enumerate(table):
lines.append(border(
(str(row[column])[0:7] + diff(row, column)).rjust(width)[0 : width]
for column, width in widths.items()
))
for line in lines:
print(line)