Adding upstream version 26.25.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
bc7749846c
commit
d9e621c994
83 changed files with 67317 additions and 67680 deletions
|
@ -1,28 +0,0 @@
|
|||
import typing as t
|
||||
|
||||
|
||||
def border(columns: t.Iterable[str]) -> str:
|
||||
columns = " | ".join(columns)
|
||||
return f"| {columns} |"
|
||||
|
||||
|
||||
def ascii_table(table: list[dict[str, t.Any]]) -> str:
|
||||
columns = []
|
||||
for row in table:
|
||||
for key in row:
|
||||
if key not in columns:
|
||||
columns.append(key)
|
||||
|
||||
widths = {column: max(len(column), 15) for column in columns}
|
||||
|
||||
lines = [
|
||||
border(column.rjust(width) for column, width in widths.items()),
|
||||
border(str("-" * width) for width in widths.values()),
|
||||
]
|
||||
|
||||
for row in table:
|
||||
lines.append(
|
||||
border(str(row[column]).rjust(width)[0:width] for column, width in widths.items())
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
|
@ -1,12 +1,13 @@
|
|||
import sys
|
||||
import typing as t
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import pyperf
|
||||
|
||||
# Add the project root to the path so we can import from tests
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from benchmarks.helpers import ascii_table
|
||||
from sqlglot.optimizer import optimize
|
||||
from sqlglot import parse_one
|
||||
from tests.helpers import load_sql_fixture_pairs, TPCH_SCHEMA, TPCDS_SCHEMA
|
||||
from timeit import Timer
|
||||
|
||||
# Deeply nested conditions currently require a lot of recursion
|
||||
sys.setrecursionlimit(10000)
|
||||
|
@ -16,55 +17,56 @@ def gen_condition(n):
|
|||
return parse_one(" OR ".join(f"a = {i} AND b = {i}" for i in range(n)))
|
||||
|
||||
|
||||
BENCHMARKS = {
|
||||
"tpch": lambda: (
|
||||
# Create benchmark functions that return the setup data
|
||||
def get_tpch_setup():
|
||||
return (
|
||||
[parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql")],
|
||||
TPCH_SCHEMA,
|
||||
3,
|
||||
),
|
||||
"tpcds": lambda: (
|
||||
)
|
||||
|
||||
|
||||
def get_tpcds_setup():
|
||||
return (
|
||||
[parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-ds/tpc-ds.sql")],
|
||||
TPCDS_SCHEMA,
|
||||
3,
|
||||
),
|
||||
"condition_10": lambda: (
|
||||
[gen_condition(10)],
|
||||
{},
|
||||
10,
|
||||
),
|
||||
"condition_100": lambda: (
|
||||
[gen_condition(100)],
|
||||
{},
|
||||
10,
|
||||
),
|
||||
"condition_1000": lambda: (
|
||||
[gen_condition(1000)],
|
||||
{},
|
||||
3,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def bench() -> list[dict[str, t.Any]]:
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("-b", "--benchmark", choices=BENCHMARKS, action="append")
|
||||
args = parser.parse_args()
|
||||
benchmarks = list(args.benchmark or BENCHMARKS)
|
||||
def get_condition_10_setup():
|
||||
return ([gen_condition(10)], {})
|
||||
|
||||
table = []
|
||||
for benchmark in benchmarks:
|
||||
expressions, schema, n = BENCHMARKS[benchmark]()
|
||||
|
||||
def func():
|
||||
for e in expressions:
|
||||
optimize(e, schema)
|
||||
def get_condition_100_setup():
|
||||
return ([gen_condition(100)], {})
|
||||
|
||||
timer = Timer(func)
|
||||
min_duration = min(timer.repeat(repeat=n, number=1))
|
||||
table.append({"Benchmark": benchmark, "Duration (s)": round(min_duration, 4)})
|
||||
|
||||
return table
|
||||
def get_condition_1000_setup():
|
||||
return ([gen_condition(1000)], {})
|
||||
|
||||
|
||||
# Optimizer functions that will be benchmarked
|
||||
def optimize_queries(expressions, schema):
|
||||
for e in expressions:
|
||||
optimize(e, schema)
|
||||
|
||||
|
||||
def run_benchmarks():
|
||||
runner = pyperf.Runner()
|
||||
|
||||
# Define benchmarks with their setup functions
|
||||
benchmarks = {
|
||||
"tpch": get_tpch_setup,
|
||||
# "tpcds": get_tpcds_setup, # This is left out because it's too slow in CI
|
||||
"condition_10": get_condition_10_setup,
|
||||
"condition_100": get_condition_100_setup,
|
||||
"condition_1000": get_condition_1000_setup,
|
||||
}
|
||||
|
||||
for benchmark_name, benchmark_setup in benchmarks.items():
|
||||
expressions, schema = benchmark_setup()
|
||||
|
||||
runner.bench_func(f"optimize_{benchmark_name}", optimize_queries, expressions, schema)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(ascii_table(bench()))
|
||||
run_benchmarks()
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
import collections.abc
|
||||
|
||||
from benchmarks.helpers import ascii_table
|
||||
import pyperf
|
||||
|
||||
# moz_sql_parser 3.10 compatibility
|
||||
collections.Iterable = collections.abc.Iterable
|
||||
import timeit
|
||||
|
||||
import numpy as np
|
||||
|
||||
# import sqlfluff
|
||||
# import moz_sql_parser
|
||||
|
@ -56,7 +52,7 @@ ORDER BY
|
|||
"e"."employee_id"
|
||||
"""
|
||||
|
||||
short = "select 1 as a, case when 1 then 1 when 2 then 2 else 3 end as b, c from x"
|
||||
short = "SELECT 1 AS a, CASE WHEN 1 THEN 1 WHEN 2 THEN 2 ELSE 3 END AS b, c FROM x"
|
||||
|
||||
crazy = "SELECT 1+"
|
||||
crazy += "+".join(str(i) for i in range(500))
|
||||
|
@ -190,41 +186,20 @@ def sqlfluff_parse(sql):
|
|||
sqlfluff.parse(sql)
|
||||
|
||||
|
||||
def diff(row, column):
|
||||
if column == "Query":
|
||||
return ""
|
||||
column = row[column]
|
||||
if isinstance(column, str):
|
||||
return " (N/A)"
|
||||
return f" ({str(column / row['sqlglot'])[0:5]})"
|
||||
QUERIES = {"tpch": tpch, "short": short, "long": long, "crazy": crazy}
|
||||
|
||||
|
||||
libs = [
|
||||
"sqlglot",
|
||||
"sqlglotrs",
|
||||
# "sqlfluff",
|
||||
# "sqltree",
|
||||
# "sqlparse",
|
||||
# "moz_sql_parser",
|
||||
# "sqloxide",
|
||||
]
|
||||
table = []
|
||||
def run_benchmarks():
|
||||
runner = pyperf.Runner()
|
||||
|
||||
for name, sql in {"tpch": tpch, "short": short, "long": long, "crazy": crazy}.items():
|
||||
row = {"Query": name}
|
||||
table.append(row)
|
||||
libs = ["sqlglot", "sqlglotrs"]
|
||||
for lib in libs:
|
||||
try:
|
||||
row[lib] = np.mean(timeit.repeat(lambda: globals()[lib + "_parse"](sql), number=3))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
row[lib] = "error"
|
||||
for query_name, sql in QUERIES.items():
|
||||
bench_name = f"parse_{lib}_{query_name}"
|
||||
parse_func = globals()[f"{lib}_parse"]
|
||||
|
||||
print(
|
||||
ascii_table(
|
||||
[
|
||||
{k: v if v == "Query" else str(row[k])[0:7] + diff(row, k) for k, v in row.items()}
|
||||
for row in table
|
||||
]
|
||||
)
|
||||
)
|
||||
runner.bench_func(bench_name, parse_func, sql)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_benchmarks()
|
Loading…
Add table
Add a link
Reference in a new issue