230 lines
6.7 KiB
Python
230 lines
6.7 KiB
Python
import collections.abc
|
|
|
|
from helpers import ascii_table
|
|
|
|
# moz_sql_parser 3.10 compatibility
|
|
collections.Iterable = collections.abc.Iterable
|
|
import timeit
|
|
|
|
import numpy as np
|
|
|
|
# import sqlfluff
|
|
# import moz_sql_parser
|
|
# import sqloxide
|
|
# import sqlparse
|
|
# import sqltree
|
|
|
|
import sqlglot
|
|
|
|
long = """
|
|
SELECT
|
|
"e"."employee_id" AS "Employee #",
|
|
"e"."first_name" || ' ' || "e"."last_name" AS "Name",
|
|
"e"."email" AS "Email",
|
|
"e"."phone_number" AS "Phone",
|
|
TO_CHAR("e"."hire_date", 'MM/DD/YYYY') AS "Hire Date",
|
|
TO_CHAR("e"."salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Salary",
|
|
"e"."commission_pct" AS "Commission %",
|
|
'works as ' || "j"."job_title" || ' in ' || "d"."department_name" || ' department (manager: ' || "dm"."first_name" || ' ' || "dm"."last_name" || ') and immediate supervisor: ' || "m"."first_name" || ' ' || "m"."last_name" AS "Current Job",
|
|
TO_CHAR("j"."min_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') || ' - ' || TO_CHAR("j"."max_salary", 'L99G999D99', 'NLS_NUMERIC_CHARACTERS = ''.,'' NLS_CURRENCY = ''$''') AS "Current Salary",
|
|
"l"."street_address" || ', ' || "l"."postal_code" || ', ' || "l"."city" || ', ' || "l"."state_province" || ', ' || "c"."country_name" || ' (' || "r"."region_name" || ')' AS "Location",
|
|
"jh"."job_id" AS "History Job ID",
|
|
'worked from ' || TO_CHAR("jh"."start_date", 'MM/DD/YYYY') || ' to ' || TO_CHAR("jh"."end_date", 'MM/DD/YYYY') || ' as ' || "jj"."job_title" || ' in ' || "dd"."department_name" || ' department' AS "History Job Title",
|
|
case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a(b(c + 1 * 3 % 4)) end
|
|
FROM "employees" AS e
|
|
JOIN "jobs" AS j
|
|
ON "e"."job_id" = "j"."job_id"
|
|
LEFT JOIN "employees" AS m
|
|
ON "e"."manager_id" = "m"."employee_id"
|
|
LEFT JOIN "departments" AS d
|
|
ON "d"."department_id" = "e"."department_id"
|
|
LEFT JOIN "employees" AS dm
|
|
ON "d"."manager_id" = "dm"."employee_id"
|
|
LEFT JOIN "locations" AS l
|
|
ON "d"."location_id" = "l"."location_id"
|
|
LEFT JOIN "countries" AS c
|
|
ON "l"."country_id" = "c"."country_id"
|
|
LEFT JOIN "regions" AS r
|
|
ON "c"."region_id" = "r"."region_id"
|
|
LEFT JOIN "job_history" AS jh
|
|
ON "e"."employee_id" = "jh"."employee_id"
|
|
LEFT JOIN "jobs" AS jj
|
|
ON "jj"."job_id" = "jh"."job_id"
|
|
LEFT JOIN "departments" AS dd
|
|
ON "dd"."department_id" = "jh"."department_id"
|
|
ORDER BY
|
|
"e"."employee_id"
|
|
"""
|
|
|
|
short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x"
|
|
|
|
crazy = "SELECT 1+"
|
|
crazy += "+".join(str(i) for i in range(500))
|
|
crazy += " AS a, 2*"
|
|
crazy += "*".join(str(i) for i in range(500))
|
|
crazy += " AS b FROM x"
|
|
|
|
tpch = """
|
|
WITH "_e_0" AS (
|
|
SELECT
|
|
"partsupp"."ps_partkey" AS "ps_partkey",
|
|
"partsupp"."ps_suppkey" AS "ps_suppkey",
|
|
"partsupp"."ps_supplycost" AS "ps_supplycost"
|
|
FROM "partsupp" AS "partsupp"
|
|
), "_e_1" AS (
|
|
SELECT
|
|
"region"."r_regionkey" AS "r_regionkey",
|
|
"region"."r_name" AS "r_name"
|
|
FROM "region" AS "region"
|
|
WHERE
|
|
"region"."r_name" = 'EUROPE'
|
|
)
|
|
SELECT
|
|
"supplier"."s_acctbal" AS "s_acctbal",
|
|
"supplier"."s_name" AS "s_name",
|
|
"nation"."n_name" AS "n_name",
|
|
"part"."p_partkey" AS "p_partkey",
|
|
"part"."p_mfgr" AS "p_mfgr",
|
|
"supplier"."s_address" AS "s_address",
|
|
"supplier"."s_phone" AS "s_phone",
|
|
"supplier"."s_comment" AS "s_comment"
|
|
FROM (
|
|
SELECT
|
|
"part"."p_partkey" AS "p_partkey",
|
|
"part"."p_mfgr" AS "p_mfgr",
|
|
"part"."p_type" AS "p_type",
|
|
"part"."p_size" AS "p_size"
|
|
FROM "part" AS "part"
|
|
WHERE
|
|
"part"."p_size" = 15
|
|
AND "part"."p_type" LIKE '%BRASS'
|
|
) AS "part"
|
|
LEFT JOIN (
|
|
SELECT
|
|
MIN("partsupp"."ps_supplycost") AS "_col_0",
|
|
"partsupp"."ps_partkey" AS "_u_1"
|
|
FROM "_e_0" AS "partsupp"
|
|
CROSS JOIN "_e_1" AS "region"
|
|
JOIN (
|
|
SELECT
|
|
"nation"."n_nationkey" AS "n_nationkey",
|
|
"nation"."n_regionkey" AS "n_regionkey"
|
|
FROM "nation" AS "nation"
|
|
) AS "nation"
|
|
ON "nation"."n_regionkey" = "region"."r_regionkey"
|
|
JOIN (
|
|
SELECT
|
|
"supplier"."s_suppkey" AS "s_suppkey",
|
|
"supplier"."s_nationkey" AS "s_nationkey"
|
|
FROM "supplier" AS "supplier"
|
|
) AS "supplier"
|
|
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
|
|
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
|
|
GROUP BY
|
|
"partsupp"."ps_partkey"
|
|
) AS "_u_0"
|
|
ON "part"."p_partkey" = "_u_0"."_u_1"
|
|
CROSS JOIN "_e_1" AS "region"
|
|
JOIN (
|
|
SELECT
|
|
"nation"."n_nationkey" AS "n_nationkey",
|
|
"nation"."n_name" AS "n_name",
|
|
"nation"."n_regionkey" AS "n_regionkey"
|
|
FROM "nation" AS "nation"
|
|
) AS "nation"
|
|
ON "nation"."n_regionkey" = "region"."r_regionkey"
|
|
JOIN "_e_0" AS "partsupp"
|
|
ON "part"."p_partkey" = "partsupp"."ps_partkey"
|
|
JOIN (
|
|
SELECT
|
|
"supplier"."s_suppkey" AS "s_suppkey",
|
|
"supplier"."s_name" AS "s_name",
|
|
"supplier"."s_address" AS "s_address",
|
|
"supplier"."s_nationkey" AS "s_nationkey",
|
|
"supplier"."s_phone" AS "s_phone",
|
|
"supplier"."s_acctbal" AS "s_acctbal",
|
|
"supplier"."s_comment" AS "s_comment"
|
|
FROM "supplier" AS "supplier"
|
|
) AS "supplier"
|
|
ON "supplier"."s_nationkey" = "nation"."n_nationkey"
|
|
AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey"
|
|
WHERE
|
|
"partsupp"."ps_supplycost" = "_u_0"."_col_0"
|
|
AND NOT "_u_0"."_u_1" IS NULL
|
|
ORDER BY
|
|
"supplier"."s_acctbal" DESC,
|
|
"nation"."n_name",
|
|
"supplier"."s_name",
|
|
"part"."p_partkey"
|
|
LIMIT 100
|
|
"""
|
|
|
|
|
|
def sqlglot_parse(sql):
|
|
sqlglot.tokens.USE_RS_TOKENIZER = False
|
|
sqlglot.parse_one(sql, error_level=sqlglot.ErrorLevel.IGNORE)
|
|
|
|
|
|
def sqlglotrs_parse(sql):
|
|
sqlglot.tokens.USE_RS_TOKENIZER = True
|
|
sqlglot.parse_one(sql, error_level=sqlglot.ErrorLevel.IGNORE)
|
|
|
|
|
|
def sqltree_parse(sql):
|
|
sqltree.api.sqltree(sql.replace('"', "`").replace("''", '"'))
|
|
|
|
|
|
def sqlparse_parse(sql):
|
|
sqlparse.parse(sql)
|
|
|
|
|
|
def moz_sql_parser_parse(sql):
|
|
moz_sql_parser.parse(sql)
|
|
|
|
|
|
def sqloxide_parse(sql):
|
|
sqloxide.parse_sql(sql, dialect="ansi")
|
|
|
|
|
|
def sqlfluff_parse(sql):
|
|
sqlfluff.parse(sql)
|
|
|
|
|
|
def diff(row, column):
|
|
if column == "Query":
|
|
return ""
|
|
column = row[column]
|
|
if isinstance(column, str):
|
|
return " (N/A)"
|
|
return f" ({str(column / row['sqlglot'])[0:5]})"
|
|
|
|
|
|
libs = [
|
|
"sqlglot",
|
|
"sqlglotrs",
|
|
# "sqlfluff",
|
|
# "sqltree",
|
|
# "sqlparse",
|
|
# "moz_sql_parser",
|
|
# "sqloxide",
|
|
]
|
|
table = []
|
|
|
|
for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items():
|
|
row = {"Query": name}
|
|
table.append(row)
|
|
for lib in libs:
|
|
try:
|
|
row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3))
|
|
except Exception as e:
|
|
print(e)
|
|
row[lib] = "error"
|
|
|
|
print(
|
|
ascii_table(
|
|
[
|
|
{k: v if v == "Query" else str(row[k])[0:7] + diff(row, k) for k, v in row.items()}
|
|
for row in table
|
|
]
|
|
)
|
|
)
|