2025-02-13 15:01:55 +01:00
|
|
|
"""
|
|
|
|
.. include:: ../README.md
|
2025-02-13 15:23:26 +01:00
|
|
|
|
2025-02-13 15:07:05 +01:00
|
|
|
----
|
2025-02-13 15:01:55 +01:00
|
|
|
"""
|
2025-02-13 14:51:47 +01:00
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2025-02-13 15:57:23 +01:00
|
|
|
import logging
|
2025-02-13 14:53:05 +01:00
|
|
|
import typing as t
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot import expressions as exp
|
2025-02-13 15:40:23 +01:00
|
|
|
from sqlglot.dialects.dialect import Dialect as Dialect, Dialects as Dialects
|
|
|
|
from sqlglot.diff import diff as diff
|
|
|
|
from sqlglot.errors import (
|
|
|
|
ErrorLevel as ErrorLevel,
|
|
|
|
ParseError as ParseError,
|
|
|
|
TokenError as TokenError,
|
|
|
|
UnsupportedError as UnsupportedError,
|
|
|
|
)
|
2025-02-13 06:15:54 +01:00
|
|
|
from sqlglot.expressions import (
|
2025-02-13 15:40:23 +01:00
|
|
|
Expression as Expression,
|
|
|
|
alias_ as alias,
|
|
|
|
and_ as and_,
|
2025-02-13 15:52:09 +01:00
|
|
|
cast as cast,
|
2025-02-13 15:40:23 +01:00
|
|
|
column as column,
|
|
|
|
condition as condition,
|
|
|
|
except_ as except_,
|
|
|
|
from_ as from_,
|
2025-02-13 15:52:09 +01:00
|
|
|
func as func,
|
2025-02-13 15:40:23 +01:00
|
|
|
intersect as intersect,
|
|
|
|
maybe_parse as maybe_parse,
|
|
|
|
not_ as not_,
|
|
|
|
or_ as or_,
|
|
|
|
select as select,
|
|
|
|
subquery as subquery,
|
|
|
|
table_ as table,
|
|
|
|
to_column as to_column,
|
2025-02-13 15:52:09 +01:00
|
|
|
to_identifier as to_identifier,
|
2025-02-13 15:40:23 +01:00
|
|
|
to_table as to_table,
|
|
|
|
union as union,
|
2025-02-13 06:15:54 +01:00
|
|
|
)
|
2025-02-13 15:40:23 +01:00
|
|
|
from sqlglot.generator import Generator as Generator
|
|
|
|
from sqlglot.parser import Parser as Parser
|
|
|
|
from sqlglot.schema import MappingSchema as MappingSchema, Schema as Schema
|
|
|
|
from sqlglot.tokens import Tokenizer as Tokenizer, TokenType as TokenType
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 15:09:58 +01:00
|
|
|
if t.TYPE_CHECKING:
|
2025-02-13 15:57:23 +01:00
|
|
|
from sqlglot._typing import E
|
2025-02-13 15:40:23 +01:00
|
|
|
from sqlglot.dialects.dialect import DialectType as DialectType
|
2025-02-13 15:09:58 +01:00
|
|
|
|
2025-02-13 15:57:23 +01:00
|
|
|
logger = logging.getLogger("sqlglot")
|
2025-02-13 15:09:58 +01:00
|
|
|
|
|
|
|
|
2025-02-13 15:57:23 +01:00
|
|
|
try:
|
|
|
|
from sqlglot._version import __version__, __version_tuple__
|
|
|
|
except ImportError:
|
|
|
|
logger.error(
|
|
|
|
"Unable to set __version__, run `pip install -e .` or `python setup.py develop` first."
|
|
|
|
)
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
pretty = False
|
2025-02-13 15:07:05 +01:00
|
|
|
"""Whether to format generated SQL by default."""
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 14:48:46 +01:00
|
|
|
schema = MappingSchema()
|
2025-02-13 15:07:05 +01:00
|
|
|
"""The default schema used by SQLGlot (e.g. in the optimizer)."""
|
2025-02-13 14:48:46 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 15:09:58 +01:00
|
|
|
def parse(sql: str, read: DialectType = None, **opts) -> t.List[t.Optional[Expression]]:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string into a collection of syntax trees, one per parsed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to parse.
|
|
|
|
read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.parser.Parser` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The resulting syntax tree collection.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
|
|
|
dialect = Dialect.get_or_raise(read)()
|
|
|
|
return dialect.parse(sql, **opts)
|
|
|
|
|
|
|
|
|
2025-02-13 15:09:58 +01:00
|
|
|
@t.overload
|
2025-02-13 20:04:59 +01:00
|
|
|
def parse_one(sql: str, *, into: t.Type[E], **opts) -> E:
|
2025-02-13 15:09:58 +01:00
|
|
|
...
|
|
|
|
|
|
|
|
|
|
|
|
@t.overload
|
2025-02-13 20:04:59 +01:00
|
|
|
def parse_one(sql: str, **opts) -> Expression:
|
2025-02-13 15:09:58 +01:00
|
|
|
...
|
|
|
|
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
def parse_one(
|
2025-02-13 20:04:59 +01:00
|
|
|
sql: str, read: DialectType = None, into: t.Optional[exp.IntoType] = None, **opts
|
2025-02-13 15:03:38 +01:00
|
|
|
) -> Expression:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string and returns a syntax tree for the first parsed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to parse.
|
|
|
|
read: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
into: the SQLGlot Expression to parse into.
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.parser.Parser` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The syntax tree for the first parsed statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
dialect = Dialect.get_or_raise(read)()
|
|
|
|
|
|
|
|
if into:
|
|
|
|
result = dialect.parse_into(into, sql, **opts)
|
|
|
|
else:
|
|
|
|
result = dialect.parse(sql, **opts)
|
|
|
|
|
2025-02-13 15:03:38 +01:00
|
|
|
for expression in result:
|
|
|
|
if not expression:
|
|
|
|
raise ParseError(f"No expression was parsed from '{sql}'")
|
|
|
|
return expression
|
|
|
|
else:
|
|
|
|
raise ParseError(f"No expression was parsed from '{sql}'")
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
2025-02-13 14:53:05 +01:00
|
|
|
def transpile(
|
|
|
|
sql: str,
|
2025-02-13 15:09:58 +01:00
|
|
|
read: DialectType = None,
|
|
|
|
write: DialectType = None,
|
2025-02-13 14:53:05 +01:00
|
|
|
identity: bool = True,
|
|
|
|
error_level: t.Optional[ErrorLevel] = None,
|
|
|
|
**opts,
|
|
|
|
) -> t.List[str]:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:53:05 +01:00
|
|
|
Parses the given SQL string in accordance with the source dialect and returns a list of SQL strings transformed
|
|
|
|
to conform to the target dialect. Each string in the returned list represents a single transformed SQL statement.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 14:53:05 +01:00
|
|
|
sql: the SQL code string to transpile.
|
|
|
|
read: the source dialect used to parse the input string (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
write: the target dialect into which the input should be transformed (eg. "spark", "hive", "presto", "mysql").
|
|
|
|
identity: if set to `True` and if the target dialect is not specified the source dialect will be used as both:
|
|
|
|
the source and the target dialect.
|
|
|
|
error_level: the desired error level of the parser.
|
2025-02-13 15:07:05 +01:00
|
|
|
**opts: other `sqlglot.generator.Generator` options.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Returns:
|
2025-02-13 14:53:05 +01:00
|
|
|
The list of transpiled SQL statements.
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 15:53:39 +01:00
|
|
|
write = (read if write is None else write) if identity else write
|
2025-02-13 06:15:54 +01:00
|
|
|
return [
|
|
|
|
Dialect.get_or_raise(write)().generate(expression, **opts)
|
|
|
|
for expression in parse(sql, read, error_level=error_level)
|
|
|
|
]
|