1
0
Fork 0

Merging upstream version 10.6.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 15:08:15 +01:00
parent fe1b1057f7
commit 2153103f81
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
36 changed files with 1007 additions and 270 deletions

View file

@ -1,17 +1,14 @@
"""
## Dialects
One of the core abstractions in SQLGlot is the concept of a "dialect". The `Dialect` class essentially implements a
"SQLGlot dialect", which aims to be as generic and ANSI-compliant as possible. It relies on the base `Tokenizer`,
`Parser` and `Generator` classes to achieve this goal, so these need to be very lenient when it comes to consuming
SQL code.
While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
SQL transpilation framework.
However, there are cases where the syntax of different SQL dialects varies wildly, even for common tasks. One such
example is the date/time functions, which can be hard to deal with. For this reason, it's sometimes necessary to
override the base dialect in order to specialize its behavior. This can be easily done in SQLGlot: supporting new
dialects is as simple as subclassing from `Dialect` and overriding its various components (e.g. the `Parser` class),
in order to implement the target behavior.
The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
classes as needed.
### Implementing a custom Dialect

View file

@ -169,6 +169,13 @@ class BigQuery(Dialect):
TokenType.VALUES,
}
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore
"NOT DETERMINISTIC": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
),
}
class Generator(generator.Generator):
TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore

View file

@ -66,12 +66,11 @@ def _sort_array_reverse(args):
return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
def _struct_pack_sql(self, expression):
def _struct_sql(self, expression):
args = [
self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e)
for e in expression.expressions
f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions
]
return f"STRUCT_PACK({', '.join(args)})"
return f"{{{', '.join(args)}}}"
def _datatype_sql(self, expression):
@ -153,7 +152,7 @@ class DuckDB(Dialect):
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
exp.StrToTime: _str_to_time_sql,
exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
exp.Struct: _struct_pack_sql,
exp.Struct: _struct_sql,
exp.TableSample: no_tablesample_sql,
exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
exp.TimeStrToTime: timestrtotime_sql,

View file

@ -251,7 +251,7 @@ class Hive(Dialect):
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore
TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties(
"WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
expressions=self._parse_wrapped_csv(self._parse_property)
),
}

View file

@ -202,7 +202,7 @@ class MySQL(Dialect):
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore
TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty),
"ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
}
STATEMENT_PARSERS = {

View file

@ -74,13 +74,16 @@ class Oracle(Dialect):
def query_modifiers(self, expression, *sqls):
return csv(
*sqls,
*[self.sql(sql) for sql in expression.args.get("laterals", [])],
*[self.sql(sql) for sql in expression.args.get("joins", [])],
*[self.sql(sql) for sql in expression.args.get("joins") or []],
self.sql(expression, "match"),
*[self.sql(sql) for sql in expression.args.get("laterals") or []],
self.sql(expression, "where"),
self.sql(expression, "group"),
self.sql(expression, "having"),
self.sql(expression, "qualify"),
self.sql(expression, "window"),
self.seg("WINDOW ") + self.expressions(expression, "windows", flat=True)
if expression.args.get("windows")
else "",
self.sql(expression, "distribute"),
self.sql(expression, "sort"),
self.sql(expression, "cluster"),
@ -99,6 +102,7 @@ class Oracle(Dialect):
class Tokenizer(tokens.Tokenizer):
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
"MINUS": TokenType.EXCEPT,
"START": TokenType.BEGIN,
"TOP": TokenType.TOP,

View file

@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
no_paren_current_date_sql,
no_tablesample_sql,
no_trycast_sql,
rename_func,
str_position_sql,
trim_sql,
)
@ -260,6 +261,16 @@ class Postgres(Dialect):
"TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
}
BITWISE = {
**parser.Parser.BITWISE, # type: ignore
TokenType.HASH: exp.BitwiseXor,
}
FACTOR = {
**parser.Parser.FACTOR, # type: ignore
TokenType.CARET: exp.Pow,
}
class Generator(generator.Generator):
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING, # type: ignore
@ -273,6 +284,7 @@ class Postgres(Dialect):
TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore
exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
exp.ColumnDef: preprocess(
[
_auto_increment_to_serial,
@ -285,11 +297,13 @@ class Postgres(Dialect):
exp.JSONBExtract: lambda self, e: self.binary(e, "#>"),
exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"),
exp.JSONBContains: lambda self, e: self.binary(e, "?"),
exp.Pow: lambda self, e: self.binary(e, "^"),
exp.CurrentDate: no_paren_current_date_sql,
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
exp.DateAdd: _date_add_sql("+"),
exp.DateSub: _date_add_sql("-"),
exp.DateDiff: _date_diff_sql,
exp.LogicalOr: rename_func("BOOL_OR"),
exp.RegexpLike: lambda self, e: self.binary(e, "~"),
exp.RegexpILike: lambda self, e: self.binary(e, "~*"),
exp.StrPosition: str_position_sql,

View file

@ -174,6 +174,7 @@ class Presto(Dialect):
"DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
"DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
"FROM_UNIXTIME": _from_unixtime,
"NOW": exp.CurrentTimestamp.from_arg_list,
"STRPOS": lambda args: exp.StrPosition(
this=seq_get(args, 0),
substr=seq_get(args, 1),
@ -194,7 +195,6 @@ class Presto(Dialect):
FUNCTION_PARSERS.pop("TRIM")
class Generator(generator.Generator):
STRUCT_DELIMITER = ("(", ")")
ROOT_PROPERTIES = {exp.SchemaCommentProperty}

View file

@ -93,7 +93,7 @@ class Redshift(Postgres):
rows = [tuple_exp.expressions for tuple_exp in expression.expressions]
selects = []
for i, row in enumerate(rows):
if i == 0:
if i == 0 and expression.alias:
row = [
exp.alias_(value, column_name)
for value, column_name in zip(row, expression.args["alias"].args["columns"])

View file

@ -178,11 +178,6 @@ class Snowflake(Dialect):
),
}
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS,
TokenType.PARTITION_BY: lambda self: self._parse_partitioned_by(),
}
class Tokenizer(tokens.Tokenizer):
QUOTES = ["'", "$$"]
ESCAPES = ["\\", "'"]
@ -195,6 +190,7 @@ class Snowflake(Dialect):
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
"EXCLUDE": TokenType.EXCEPT,
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
"RENAME": TokenType.REPLACE,
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMP_NTZ": TokenType.TIMESTAMP,

View file

@ -1,7 +1,7 @@
from __future__ import annotations
from sqlglot import exp, parser
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func, trim_sql
from sqlglot.dialects.hive import Hive
from sqlglot.helper import seq_get
@ -122,6 +122,7 @@ class Spark(Hive):
exp.Reduce: rename_func("AGGREGATE"),
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
exp.Trim: trim_sql,
exp.VariancePop: rename_func("VAR_POP"),
exp.DateFromParts: rename_func("MAKE_DATE"),
exp.LogicalOr: rename_func("BOOL_OR"),