1
0
Fork 0
sqlglot/sqlglot/dialects/__init__.py
Daniel Baumann 1e2a8571aa
Merging upstream version 26.16.2.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-04-25 07:27:01 +02:00

118 lines
3.4 KiB
Python

# ruff: noqa: F401
"""
## Dialects
While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
SQL transpilation framework.
The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
classes as needed.
### Implementing a custom Dialect
Creating a new SQL dialect may seem complicated at first, but it is actually quite simple in SQLGlot:
```python
from sqlglot import exp
from sqlglot.dialects.dialect import Dialect
from sqlglot.generator import Generator
from sqlglot.tokens import Tokenizer, TokenType
class Custom(Dialect):
class Tokenizer(Tokenizer):
QUOTES = ["'", '"'] # Strings can be delimited by either single or double quotes
IDENTIFIERS = ["`"] # Identifiers can be delimited by backticks
# Associates certain meaningful words with tokens that capture their intent
KEYWORDS = {
**Tokenizer.KEYWORDS,
"INT64": TokenType.BIGINT,
"FLOAT64": TokenType.DOUBLE,
}
class Generator(Generator):
# Specifies how AST nodes, i.e. subclasses of exp.Expression, should be converted into SQL
TRANSFORMS = {
exp.Array: lambda self, e: f"[{self.expressions(e)}]",
}
# Specifies how AST nodes representing data types should be converted into SQL
TYPE_MAPPING = {
exp.DataType.Type.TINYINT: "INT64",
exp.DataType.Type.SMALLINT: "INT64",
exp.DataType.Type.INT: "INT64",
exp.DataType.Type.BIGINT: "INT64",
exp.DataType.Type.DECIMAL: "NUMERIC",
exp.DataType.Type.FLOAT: "FLOAT64",
exp.DataType.Type.DOUBLE: "FLOAT64",
exp.DataType.Type.BOOLEAN: "BOOL",
exp.DataType.Type.TEXT: "STRING",
}
```
The above example demonstrates how certain parts of the base `Dialect` class can be overridden to match a different
specification. Even though it is a fairly realistic starting point, we strongly encourage the reader to study existing
dialect implementations in order to understand how their various components can be modified, depending on the use-case.
----
"""
import importlib
import threading
DIALECTS = [
"Athena",
"BigQuery",
"ClickHouse",
"Databricks",
"Doris",
"Drill",
"Druid",
"DuckDB",
"Dune",
"Hive",
"Materialize",
"MySQL",
"Oracle",
"Postgres",
"Presto",
"PRQL",
"Redshift",
"RisingWave",
"Snowflake",
"Spark",
"Spark2",
"SQLite",
"StarRocks",
"Tableau",
"Teradata",
"Trino",
"TSQL",
]
MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
DIALECT_MODULE_NAMES = MODULE_BY_DIALECT.values()
MODULE_BY_ATTRIBUTE = {
**MODULE_BY_DIALECT,
"Dialect": "dialect",
"Dialects": "dialect",
}
__all__ = list(MODULE_BY_ATTRIBUTE)
_import_lock = threading.Lock()
def __getattr__(name):
module_name = MODULE_BY_ATTRIBUTE.get(name)
if module_name:
with _import_lock:
module = importlib.import_module(f"sqlglot.dialects.{module_name}")
return getattr(module, name)
raise AttributeError(f"module {__name__} has no attribute {name}")