1
0
Fork 0
sqlglot/tests/tpch.py
Daniel Baumann d61627452f
Adding upstream version 16.2.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 16:00:14 +01:00

115 lines
3.1 KiB
Python

import time
from sqlglot.optimizer import optimize
INPUT = "/home/toby/dev/tpch/{i}.sql"
OUTPUT = "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql"
NUM = 22
SCHEMA = {
"lineitem": {
"l_orderkey": "bigint",
"l_partkey": "bigint",
"l_suppkey": "bigint",
"l_linenumber": "bigint",
"l_quantity": "double",
"l_extendedprice": "double",
"l_discount": "double",
"l_tax": "double",
"l_returnflag": "string",
"l_linestatus": "string",
"l_shipdate": "string",
"l_commitdate": "string",
"l_receiptdate": "string",
"l_shipinstruct": "string",
"l_shipmode": "string",
"l_comment": "string",
},
"orders": {
"o_orderkey": "bigint",
"o_custkey": "bigint",
"o_orderstatus": "string",
"o_totalprice": "double",
"o_orderdate": "string",
"o_orderpriority": "string",
"o_clerk": "string",
"o_shippriority": "int",
"o_comment": "string",
},
"customer": {
"c_custkey": "bigint",
"c_name": "string",
"c_address": "string",
"c_nationkey": "bigint",
"c_phone": "string",
"c_acctbal": "double",
"c_mktsegment": "string",
"c_comment": "string",
},
"part": {
"p_partkey": "bigint",
"p_name": "string",
"p_mfgr": "string",
"p_brand": "string",
"p_type": "string",
"p_size": "int",
"p_container": "string",
"p_retailprice": "double",
"p_comment": "string",
},
"supplier": {
"s_suppkey": "bigint",
"s_name": "string",
"s_address": "string",
"s_nationkey": "bigint",
"s_phone": "string",
"s_acctbal": "double",
"s_comment": "string",
},
"partsupp": {
"ps_partkey": "bigint",
"ps_suppkey": "bigint",
"ps_availqty": "int",
"ps_supplycost": "double",
"ps_comment": "string",
},
"nation": {
"n_nationkey": "bigint",
"n_name": "string",
"n_regionkey": "bigint",
"n_comment": "string",
},
"region": {
"r_regionkey": "bigint",
"r_name": "string",
"r_comment": "string",
},
}
KIND = "H"
with open(OUTPUT, "w", encoding="UTF-8") as fixture:
for i in range(NUM):
i = i + 1
with open(INPUT.format(i=i), encoding="UTF-8") as file:
original = "\n".join(
line.rstrip()
for line in file.read().split(";")[0].split("\n")
if not line.startswith("--")
)
original = original.replace("`", '"').strip()
now = time.time()
try:
optimized = optimize(original, schema=SCHEMA)
except Exception as e:
print("****", i, e, "****")
continue
fixture.write(
f"""--------------------------------------
-- TPC-{KIND} {i}
--------------------------------------
{original};
{optimized.sql(pretty=True)};
"""
)
print(i, time.time() - now)