115 lines
3.1 KiB
Python
115 lines
3.1 KiB
Python
import time
|
|
|
|
from sqlglot.optimizer import optimize
|
|
|
|
INPUT = "/home/toby/dev/tpch/{i}.sql"
|
|
OUTPUT = "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql"
|
|
NUM = 22
|
|
SCHEMA = {
|
|
"lineitem": {
|
|
"l_orderkey": "bigint",
|
|
"l_partkey": "bigint",
|
|
"l_suppkey": "bigint",
|
|
"l_linenumber": "bigint",
|
|
"l_quantity": "double",
|
|
"l_extendedprice": "double",
|
|
"l_discount": "double",
|
|
"l_tax": "double",
|
|
"l_returnflag": "string",
|
|
"l_linestatus": "string",
|
|
"l_shipdate": "string",
|
|
"l_commitdate": "string",
|
|
"l_receiptdate": "string",
|
|
"l_shipinstruct": "string",
|
|
"l_shipmode": "string",
|
|
"l_comment": "string",
|
|
},
|
|
"orders": {
|
|
"o_orderkey": "bigint",
|
|
"o_custkey": "bigint",
|
|
"o_orderstatus": "string",
|
|
"o_totalprice": "double",
|
|
"o_orderdate": "string",
|
|
"o_orderpriority": "string",
|
|
"o_clerk": "string",
|
|
"o_shippriority": "int",
|
|
"o_comment": "string",
|
|
},
|
|
"customer": {
|
|
"c_custkey": "bigint",
|
|
"c_name": "string",
|
|
"c_address": "string",
|
|
"c_nationkey": "bigint",
|
|
"c_phone": "string",
|
|
"c_acctbal": "double",
|
|
"c_mktsegment": "string",
|
|
"c_comment": "string",
|
|
},
|
|
"part": {
|
|
"p_partkey": "bigint",
|
|
"p_name": "string",
|
|
"p_mfgr": "string",
|
|
"p_brand": "string",
|
|
"p_type": "string",
|
|
"p_size": "int",
|
|
"p_container": "string",
|
|
"p_retailprice": "double",
|
|
"p_comment": "string",
|
|
},
|
|
"supplier": {
|
|
"s_suppkey": "bigint",
|
|
"s_name": "string",
|
|
"s_address": "string",
|
|
"s_nationkey": "bigint",
|
|
"s_phone": "string",
|
|
"s_acctbal": "double",
|
|
"s_comment": "string",
|
|
},
|
|
"partsupp": {
|
|
"ps_partkey": "bigint",
|
|
"ps_suppkey": "bigint",
|
|
"ps_availqty": "int",
|
|
"ps_supplycost": "double",
|
|
"ps_comment": "string",
|
|
},
|
|
"nation": {
|
|
"n_nationkey": "bigint",
|
|
"n_name": "string",
|
|
"n_regionkey": "bigint",
|
|
"n_comment": "string",
|
|
},
|
|
"region": {
|
|
"r_regionkey": "bigint",
|
|
"r_name": "string",
|
|
"r_comment": "string",
|
|
},
|
|
}
|
|
KIND = "H"
|
|
|
|
with open(OUTPUT, "w", encoding="UTF-8") as fixture:
|
|
for i in range(NUM):
|
|
i = i + 1
|
|
with open(INPUT.format(i=i), encoding="UTF-8") as file:
|
|
original = "\n".join(
|
|
line.rstrip()
|
|
for line in file.read().split(";")[0].split("\n")
|
|
if not line.startswith("--")
|
|
)
|
|
original = original.replace("`", '"').strip()
|
|
now = time.time()
|
|
try:
|
|
optimized = optimize(original, schema=SCHEMA)
|
|
except Exception as e:
|
|
print("****", i, e, "****")
|
|
continue
|
|
|
|
fixture.write(
|
|
f"""--------------------------------------
|
|
-- TPC-{KIND} {i}
|
|
--------------------------------------
|
|
{original};
|
|
{optimized.sql(pretty=True)};
|
|
|
|
"""
|
|
)
|
|
print(i, time.time() - now)
|