2025-02-13 15:01:55 +01:00
|
|
|
"""
|
|
|
|
.. include:: ../README.md
|
2025-02-13 15:07:05 +01:00
|
|
|
----
|
2025-02-13 15:01:55 +01:00
|
|
|
"""
|
2025-02-13 14:51:47 +01:00
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import typing as t
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot import expressions as exp
|
|
|
|
from sqlglot.dialects import Dialect, Dialects
|
|
|
|
from sqlglot.diff import diff
|
|
|
|
from sqlglot.errors import ErrorLevel, ParseError, TokenError, UnsupportedError
|
|
|
|
from sqlglot.expressions import Expression
|
|
|
|
from sqlglot.expressions import alias_ as alias
|
|
|
|
from sqlglot.expressions import (
|
|
|
|
and_,
|
|
|
|
column,
|
|
|
|
condition,
|
2025-02-13 14:45:11 +01:00
|
|
|
except_,
|
2025-02-13 06:15:54 +01:00
|
|
|
from_,
|
2025-02-13 14:45:11 +01:00
|
|
|
intersect,
|
2025-02-13 06:15:54 +01:00
|
|
|
maybe_parse,
|
|
|
|
not_,
|
|
|
|
or_,
|
|
|
|
select,
|
|
|
|
subquery,
|
|
|
|
)
|
|
|
|
from sqlglot.expressions import table_ as table
|
2025-02-13 14:53:05 +01:00
|
|
|
from sqlglot.expressions import to_column, to_table, union
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot.generator import Generator
|
|
|
|
from sqlglot.parser import Parser
|
2025-02-13 15:07:05 +01:00
|
|
|
from sqlglot.schema import MappingSchema, Schema
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot.tokens import Tokenizer, TokenType
|
|
|
|
|
2025-02-13 15:08:15 +01:00
|
|
|
__version__ = "10.6.0"
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
pretty = False
|
2025-02-13 15:07:05 +01:00
|
|
|
"""Whether to format generated SQL by default."""
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 14:48:46 +01:00
|
|
|
schema = MappingSchema()
|
2025-02-13 15:07:05 +01:00
|
|
|
"""The default schema used by SQLGlot (e.g. in the optimizer)."""
|
2025-02-13 14:48:46 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
def parse(
|
|
|
|
sql: str, read: t.Optional[str | Dialect] = None, **opts
|
|
|
|
) -> t.List[t.Optional[Expression]]:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string into a collection of syntax trees, one per parsed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to parse.
|
|
|
|
read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.parser.Parser` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The resulting syntax tree collection.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
|
|
|
dialect = Dialect.get_or_raise(read)()
|
|
|
|
return dialect.parse(sql, **opts)
|
|
|
|
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
def parse_one(
|
|
|
|
sql: str,
|
|
|
|
read: t.Optional[str | Dialect] = None,
|
2025-02-13 15:07:05 +01:00
|
|
|
into: t.Optional[exp.IntoType] = None,
|
2025-02-13 14:53:05 +01:00
|
|
|
**opts,
|
2025-02-13 15:03:38 +01:00
|
|
|
) -> Expression:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string and returns a syntax tree for the first parsed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to parse.
|
|
|
|
read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
into: the SQLGlot Expression to parse into.
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.parser.Parser` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The syntax tree for the first parsed statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
dialect = Dialect.get_or_raise(read)()
|
|
|
|
|
|
|
|
if into:
|
|
|
|
result = dialect.parse_into(into, sql, **opts)
|
|
|
|
else:
|
|
|
|
result = dialect.parse(sql, **opts)
|
|
|
|
|
2025-02-13 15:03:38 +01:00
|
|
|
for expression in result:
|
|
|
|
if not expression:
|
|
|
|
raise ParseError(f"No expression was parsed from '{sql}'")
|
|
|
|
return expression
|
|
|
|
else:
|
|
|
|
raise ParseError(f"No expression was parsed from '{sql}'")
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
def transpile(
|
|
|
|
sql: str,
|
|
|
|
read: t.Optional[str | Dialect] = None,
|
|
|
|
write: t.Optional[str | Dialect] = None,
|
|
|
|
identity: bool = True,
|
|
|
|
error_level: t.Optional[ErrorLevel] = None,
|
|
|
|
**opts,
|
|
|
|
) -> t.List[str]:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string in accordance with the source dialect and returns a list of SQL strings transformed
|
|
|
|
to conform to the target dialect. Each string in the returned list represents a single transformed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to transpile.
|
|
|
|
read: the source dialect used to parse the input string (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
write: the target dialect into which the input should be transformed (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
identity: if set to `True` and if the target dialect is not specified the source dialect will be used as both:
|
|
|
|
the source and the target dialect.
|
|
|
|
error_level: the desired error level of the parser.
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.generator.Generator` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The list of transpiled SQL statements.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
|
|
|
write = write or read if identity else write
|
|
|
|
return [
|
|
|
|
Dialect.get_or_raise(write)().generate(expression, **opts)
|
|
|
|
for expression in parse(sql, read, error_level=error_level)
|
|
|
|
]
|