104 lines
3.9 KiB
Python
104 lines
3.9 KiB
Python
from __future__ import annotations
|
|
|
|
import typing as t
|
|
|
|
from sqlglot import exp
|
|
from sqlglot.dialects.dialect import Dialect, DialectType
|
|
from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
|
|
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
|
|
from sqlglot.optimizer.qualify_columns import (
|
|
pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
|
|
qualify_columns as qualify_columns_func,
|
|
quote_identifiers as quote_identifiers_func,
|
|
validate_qualify_columns as validate_qualify_columns_func,
|
|
)
|
|
from sqlglot.optimizer.qualify_tables import qualify_tables
|
|
from sqlglot.schema import Schema, ensure_schema
|
|
|
|
|
|
def qualify(
|
|
expression: exp.Expression,
|
|
dialect: DialectType = None,
|
|
db: t.Optional[str] = None,
|
|
catalog: t.Optional[str] = None,
|
|
schema: t.Optional[dict | Schema] = None,
|
|
expand_alias_refs: bool = True,
|
|
expand_stars: bool = True,
|
|
infer_schema: t.Optional[bool] = None,
|
|
isolate_tables: bool = False,
|
|
qualify_columns: bool = True,
|
|
allow_partial_qualification: bool = False,
|
|
validate_qualify_columns: bool = True,
|
|
quote_identifiers: bool = True,
|
|
identify: bool = True,
|
|
infer_csv_schemas: bool = False,
|
|
) -> exp.Expression:
|
|
"""
|
|
Rewrite sqlglot AST to have normalized and qualified tables and columns.
|
|
|
|
This step is necessary for all further SQLGlot optimizations.
|
|
|
|
Example:
|
|
>>> import sqlglot
|
|
>>> schema = {"tbl": {"col": "INT"}}
|
|
>>> expression = sqlglot.parse_one("SELECT col FROM tbl")
|
|
>>> qualify(expression, schema=schema).sql()
|
|
'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
|
|
|
|
Args:
|
|
expression: Expression to qualify.
|
|
db: Default database name for tables.
|
|
catalog: Default catalog name for tables.
|
|
schema: Schema to infer column names and types.
|
|
expand_alias_refs: Whether to expand references to aliases.
|
|
expand_stars: Whether to expand star queries. This is a necessary step
|
|
for most of the optimizer's rules to work; do not set to False unless you
|
|
know what you're doing!
|
|
infer_schema: Whether to infer the schema if missing.
|
|
isolate_tables: Whether to isolate table selects.
|
|
qualify_columns: Whether to qualify columns.
|
|
allow_partial_qualification: Whether to allow partial qualification.
|
|
validate_qualify_columns: Whether to validate columns.
|
|
quote_identifiers: Whether to run the quote_identifiers step.
|
|
This step is necessary to ensure correctness for case sensitive queries.
|
|
But this flag is provided in case this step is performed at a later time.
|
|
identify: If True, quote all identifiers, else only necessary ones.
|
|
infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
|
|
|
|
Returns:
|
|
The qualified expression.
|
|
"""
|
|
schema = ensure_schema(schema, dialect=dialect)
|
|
expression = qualify_tables(
|
|
expression,
|
|
db=db,
|
|
catalog=catalog,
|
|
schema=schema,
|
|
dialect=dialect,
|
|
infer_csv_schemas=infer_csv_schemas,
|
|
)
|
|
expression = normalize_identifiers(expression, dialect=dialect)
|
|
|
|
if isolate_tables:
|
|
expression = isolate_table_selects(expression, schema=schema)
|
|
|
|
if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
|
|
expression = pushdown_cte_alias_columns_func(expression)
|
|
|
|
if qualify_columns:
|
|
expression = qualify_columns_func(
|
|
expression,
|
|
schema,
|
|
expand_alias_refs=expand_alias_refs,
|
|
expand_stars=expand_stars,
|
|
infer_schema=infer_schema,
|
|
allow_partial_qualification=allow_partial_qualification,
|
|
)
|
|
|
|
if quote_identifiers:
|
|
expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
|
|
|
|
if validate_qualify_columns:
|
|
validate_qualify_columns_func(expression)
|
|
|
|
return expression
|