2025-02-13 06:15:54 +01:00
import collections . abc
2025-02-13 21:57:20 +01:00
from helpers import ascii_table
2025-02-13 21:37:40 +01:00
2025-02-13 06:15:54 +01:00
# moz_sql_parser 3.10 compatibility
collections . Iterable = collections . abc . Iterable
import timeit
import numpy as np
2025-02-13 14:54:32 +01:00
2025-02-13 21:20:36 +01:00
# import sqlfluff
# import moz_sql_parser
# import sqloxide
# import sqlparse
2025-02-13 21:57:20 +01:00
# import sqltree
2025-02-13 06:15:54 +01:00
import sqlglot
long = """
SELECT
" e " . " employee_id " AS " Employee # " ,
" e " . " first_name " | | ' ' | | " e " . " last_name " AS " Name " ,
" e " . " email " AS " Email " ,
" e " . " phone_number " AS " Phone " ,
TO_CHAR ( " e " . " hire_date " , ' MM/DD/YYYY ' ) AS " Hire Date " ,
TO_CHAR ( " e " . " salary " , ' L99G999D99 ' , ' NLS_NUMERIC_CHARACTERS = ' ' ., ' ' NLS_CURRENCY = ' ' $ ' ' ' ) AS " Salary " ,
2025-02-13 15:03:38 +01:00
" e " . " commission_pct " AS " Commission % " ,
2025-02-13 06:15:54 +01:00
' works as ' | | " j " . " job_title " | | ' in ' | | " d " . " department_name " | | ' department (manager: ' | | " dm " . " first_name " | | ' ' | | " dm " . " last_name " | | ' ) and immediate supervisor: ' | | " m " . " first_name " | | ' ' | | " m " . " last_name " AS " Current Job " ,
TO_CHAR ( " j " . " min_salary " , ' L99G999D99 ' , ' NLS_NUMERIC_CHARACTERS = ' ' ., ' ' NLS_CURRENCY = ' ' $ ' ' ' ) | | ' - ' | | TO_CHAR ( " j " . " max_salary " , ' L99G999D99 ' , ' NLS_NUMERIC_CHARACTERS = ' ' ., ' ' NLS_CURRENCY = ' ' $ ' ' ' ) AS " Current Salary " ,
" l " . " street_address " | | ' , ' | | " l " . " postal_code " | | ' , ' | | " l " . " city " | | ' , ' | | " l " . " state_province " | | ' , ' | | " c " . " country_name " | | ' ( ' | | " r " . " region_name " | | ' ) ' AS " Location " ,
" jh " . " job_id " AS " History Job ID " ,
' worked from ' | | TO_CHAR ( " jh " . " start_date " , ' MM/DD/YYYY ' ) | | ' to ' | | TO_CHAR ( " jh " . " end_date " , ' MM/DD/YYYY ' ) | | ' as ' | | " jj " . " job_title " | | ' in ' | | " dd " . " department_name " | | ' department ' AS " History Job Title " ,
case when 1 then 1 when 2 then 2 when 3 then 3 when 4 then 4 when 5 then 5 else a ( b ( c + 1 * 3 % 4 ) ) end
FROM " employees " AS e
JOIN " jobs " AS j
ON " e " . " job_id " = " j " . " job_id "
LEFT JOIN " employees " AS m
ON " e " . " manager_id " = " m " . " employee_id "
LEFT JOIN " departments " AS d
ON " d " . " department_id " = " e " . " department_id "
LEFT JOIN " employees " AS dm
ON " d " . " manager_id " = " dm " . " employee_id "
LEFT JOIN " locations " AS l
ON " d " . " location_id " = " l " . " location_id "
LEFT JOIN " countries " AS c
ON " l " . " country_id " = " c " . " country_id "
LEFT JOIN " regions " AS r
ON " c " . " region_id " = " r " . " region_id "
LEFT JOIN " job_history " AS jh
ON " e " . " employee_id " = " jh " . " employee_id "
LEFT JOIN " jobs " AS jj
ON " jj " . " job_id " = " jh " . " job_id "
LEFT JOIN " departments " AS dd
ON " dd " . " department_id " = " jh " . " department_id "
ORDER BY
" e " . " employee_id "
"""
short = " select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x "
crazy = " SELECT 1+ "
crazy + = " + " . join ( str ( i ) for i in range ( 500 ) )
crazy + = " AS a, 2* "
crazy + = " * " . join ( str ( i ) for i in range ( 500 ) )
crazy + = " AS b FROM x "
tpch = """
WITH " _e_0 " AS (
SELECT
" partsupp " . " ps_partkey " AS " ps_partkey " ,
" partsupp " . " ps_suppkey " AS " ps_suppkey " ,
" partsupp " . " ps_supplycost " AS " ps_supplycost "
FROM " partsupp " AS " partsupp "
) , " _e_1 " AS (
SELECT
" region " . " r_regionkey " AS " r_regionkey " ,
" region " . " r_name " AS " r_name "
FROM " region " AS " region "
WHERE
" region " . " r_name " = ' EUROPE '
)
SELECT
" supplier " . " s_acctbal " AS " s_acctbal " ,
" supplier " . " s_name " AS " s_name " ,
" nation " . " n_name " AS " n_name " ,
" part " . " p_partkey " AS " p_partkey " ,
" part " . " p_mfgr " AS " p_mfgr " ,
" supplier " . " s_address " AS " s_address " ,
" supplier " . " s_phone " AS " s_phone " ,
" supplier " . " s_comment " AS " s_comment "
FROM (
SELECT
" part " . " p_partkey " AS " p_partkey " ,
" part " . " p_mfgr " AS " p_mfgr " ,
" part " . " p_type " AS " p_type " ,
" part " . " p_size " AS " p_size "
FROM " part " AS " part "
WHERE
" part " . " p_size " = 15
AND " part " . " p_type " LIKE ' % BRASS '
) AS " part "
LEFT JOIN (
SELECT
MIN ( " partsupp " . " ps_supplycost " ) AS " _col_0 " ,
" partsupp " . " ps_partkey " AS " _u_1 "
FROM " _e_0 " AS " partsupp "
CROSS JOIN " _e_1 " AS " region "
JOIN (
SELECT
" nation " . " n_nationkey " AS " n_nationkey " ,
" nation " . " n_regionkey " AS " n_regionkey "
FROM " nation " AS " nation "
) AS " nation "
ON " nation " . " n_regionkey " = " region " . " r_regionkey "
JOIN (
SELECT
" supplier " . " s_suppkey " AS " s_suppkey " ,
" supplier " . " s_nationkey " AS " s_nationkey "
FROM " supplier " AS " supplier "
) AS " supplier "
ON " supplier " . " s_nationkey " = " nation " . " n_nationkey "
AND " supplier " . " s_suppkey " = " partsupp " . " ps_suppkey "
GROUP BY
" partsupp " . " ps_partkey "
) AS " _u_0 "
ON " part " . " p_partkey " = " _u_0 " . " _u_1 "
CROSS JOIN " _e_1 " AS " region "
JOIN (
SELECT
" nation " . " n_nationkey " AS " n_nationkey " ,
" nation " . " n_name " AS " n_name " ,
" nation " . " n_regionkey " AS " n_regionkey "
FROM " nation " AS " nation "
) AS " nation "
ON " nation " . " n_regionkey " = " region " . " r_regionkey "
JOIN " _e_0 " AS " partsupp "
ON " part " . " p_partkey " = " partsupp " . " ps_partkey "
JOIN (
SELECT
" supplier " . " s_suppkey " AS " s_suppkey " ,
" supplier " . " s_name " AS " s_name " ,
" supplier " . " s_address " AS " s_address " ,
" supplier " . " s_nationkey " AS " s_nationkey " ,
" supplier " . " s_phone " AS " s_phone " ,
" supplier " . " s_acctbal " AS " s_acctbal " ,
" supplier " . " s_comment " AS " s_comment "
FROM " supplier " AS " supplier "
) AS " supplier "
ON " supplier " . " s_nationkey " = " nation " . " n_nationkey "
AND " supplier " . " s_suppkey " = " partsupp " . " ps_suppkey "
WHERE
" partsupp " . " ps_supplycost " = " _u_0 " . " _col_0 "
AND NOT " _u_0 " . " _u_1 " IS NULL
ORDER BY
" supplier " . " s_acctbal " DESC ,
" nation " . " n_name " ,
" supplier " . " s_name " ,
" part " . " p_partkey "
LIMIT 100
"""
def sqlglot_parse ( sql ) :
2025-02-13 21:17:51 +01:00
sqlglot . tokens . USE_RS_TOKENIZER = False
sqlglot . parse_one ( sql , error_level = sqlglot . ErrorLevel . IGNORE )
def sqlglotrs_parse ( sql ) :
sqlglot . tokens . USE_RS_TOKENIZER = True
sqlglot . parse_one ( sql , error_level = sqlglot . ErrorLevel . IGNORE )
2025-02-13 06:15:54 +01:00
def sqltree_parse ( sql ) :
2025-02-13 21:20:36 +01:00
sqltree . api . sqltree ( sql . replace ( ' " ' , " ` " ) . replace ( " ' ' " , ' " ' ) )
2025-02-13 06:15:54 +01:00
def sqlparse_parse ( sql ) :
sqlparse . parse ( sql )
def moz_sql_parser_parse ( sql ) :
moz_sql_parser . parse ( sql )
def sqloxide_parse ( sql ) :
sqloxide . parse_sql ( sql , dialect = " ansi " )
2025-02-13 14:54:32 +01:00
def sqlfluff_parse ( sql ) :
sqlfluff . parse ( sql )
2025-02-13 06:15:54 +01:00
def diff ( row , column ) :
if column == " Query " :
return " "
column = row [ column ]
if isinstance ( column , str ) :
return " (N/A) "
return f " ( { str ( column / row [ ' sqlglot ' ] ) [ 0 : 5 ] } ) "
libs = [
" sqlglot " ,
2025-02-13 21:17:51 +01:00
" sqlglotrs " ,
2025-02-13 21:20:36 +01:00
# "sqlfluff",
2025-02-13 21:57:20 +01:00
# "sqltree",
2025-02-13 21:20:36 +01:00
# "sqlparse",
# "moz_sql_parser",
# "sqloxide",
2025-02-13 06:15:54 +01:00
]
table = [ ]
for name , sql in { " tpch " : tpch , " short " : short , " long " : long , " crazy " : crazy } . items ( ) :
row = { " Query " : name }
table . append ( row )
for lib in libs :
try :
row [ lib ] = np . mean ( timeit . repeat ( lambda : globals ( ) [ lib + " _parse " ] ( sql ) , number = 3 ) )
2025-02-13 14:54:32 +01:00
except Exception as e :
print ( e )
2025-02-13 06:15:54 +01:00
row [ lib ] = " error "
2025-02-13 21:37:40 +01:00
print (
ascii_table (
[
{ k : v if v == " Query " else str ( row [ k ] ) [ 0 : 7 ] + diff ( row , k ) for k , v in row . items ( ) }
for row in table
]
2025-02-13 21:20:36 +01:00
)
2025-02-13 21:37:40 +01:00
)