Merging upstream version 10.6.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
fe1b1057f7
commit
2153103f81
36 changed files with 1007 additions and 270 deletions
|
@ -1,17 +1,14 @@
|
|||
"""
|
||||
## Dialects
|
||||
|
||||
One of the core abstractions in SQLGlot is the concept of a "dialect". The `Dialect` class essentially implements a
|
||||
"SQLGlot dialect", which aims to be as generic and ANSI-compliant as possible. It relies on the base `Tokenizer`,
|
||||
`Parser` and `Generator` classes to achieve this goal, so these need to be very lenient when it comes to consuming
|
||||
SQL code.
|
||||
While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
|
||||
to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
|
||||
SQL transpilation framework.
|
||||
|
||||
However, there are cases where the syntax of different SQL dialects varies wildly, even for common tasks. One such
|
||||
example is the date/time functions, which can be hard to deal with. For this reason, it's sometimes necessary to
|
||||
override the base dialect in order to specialize its behavior. This can be easily done in SQLGlot: supporting new
|
||||
dialects is as simple as subclassing from `Dialect` and overriding its various components (e.g. the `Parser` class),
|
||||
in order to implement the target behavior.
|
||||
The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
|
||||
|
||||
Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
|
||||
classes as needed.
|
||||
|
||||
### Implementing a custom Dialect
|
||||
|
||||
|
|
|
@ -169,6 +169,13 @@ class BigQuery(Dialect):
|
|||
TokenType.VALUES,
|
||||
}
|
||||
|
||||
PROPERTY_PARSERS = {
|
||||
**parser.Parser.PROPERTY_PARSERS, # type: ignore
|
||||
"NOT DETERMINISTIC": lambda self: self.expression(
|
||||
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
|
||||
),
|
||||
}
|
||||
|
||||
class Generator(generator.Generator):
|
||||
TRANSFORMS = {
|
||||
**generator.Generator.TRANSFORMS, # type: ignore
|
||||
|
|
|
@ -66,12 +66,11 @@ def _sort_array_reverse(args):
|
|||
return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
|
||||
|
||||
|
||||
def _struct_pack_sql(self, expression):
|
||||
def _struct_sql(self, expression):
|
||||
args = [
|
||||
self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e)
|
||||
for e in expression.expressions
|
||||
f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions
|
||||
]
|
||||
return f"STRUCT_PACK({', '.join(args)})"
|
||||
return f"{{{', '.join(args)}}}"
|
||||
|
||||
|
||||
def _datatype_sql(self, expression):
|
||||
|
@ -153,7 +152,7 @@ class DuckDB(Dialect):
|
|||
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
||||
exp.StrToTime: _str_to_time_sql,
|
||||
exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
|
||||
exp.Struct: _struct_pack_sql,
|
||||
exp.Struct: _struct_sql,
|
||||
exp.TableSample: no_tablesample_sql,
|
||||
exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
|
||||
exp.TimeStrToTime: timestrtotime_sql,
|
||||
|
|
|
@ -251,7 +251,7 @@ class Hive(Dialect):
|
|||
|
||||
PROPERTY_PARSERS = {
|
||||
**parser.Parser.PROPERTY_PARSERS, # type: ignore
|
||||
TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties(
|
||||
"WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
|
||||
expressions=self._parse_wrapped_csv(self._parse_property)
|
||||
),
|
||||
}
|
||||
|
|
|
@ -202,7 +202,7 @@ class MySQL(Dialect):
|
|||
|
||||
PROPERTY_PARSERS = {
|
||||
**parser.Parser.PROPERTY_PARSERS, # type: ignore
|
||||
TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty),
|
||||
"ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
|
||||
}
|
||||
|
||||
STATEMENT_PARSERS = {
|
||||
|
|
|
@ -74,13 +74,16 @@ class Oracle(Dialect):
|
|||
def query_modifiers(self, expression, *sqls):
|
||||
return csv(
|
||||
*sqls,
|
||||
*[self.sql(sql) for sql in expression.args.get("laterals", [])],
|
||||
*[self.sql(sql) for sql in expression.args.get("joins", [])],
|
||||
*[self.sql(sql) for sql in expression.args.get("joins") or []],
|
||||
self.sql(expression, "match"),
|
||||
*[self.sql(sql) for sql in expression.args.get("laterals") or []],
|
||||
self.sql(expression, "where"),
|
||||
self.sql(expression, "group"),
|
||||
self.sql(expression, "having"),
|
||||
self.sql(expression, "qualify"),
|
||||
self.sql(expression, "window"),
|
||||
self.seg("WINDOW ") + self.expressions(expression, "windows", flat=True)
|
||||
if expression.args.get("windows")
|
||||
else "",
|
||||
self.sql(expression, "distribute"),
|
||||
self.sql(expression, "sort"),
|
||||
self.sql(expression, "cluster"),
|
||||
|
@ -99,6 +102,7 @@ class Oracle(Dialect):
|
|||
class Tokenizer(tokens.Tokenizer):
|
||||
KEYWORDS = {
|
||||
**tokens.Tokenizer.KEYWORDS,
|
||||
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
|
||||
"MINUS": TokenType.EXCEPT,
|
||||
"START": TokenType.BEGIN,
|
||||
"TOP": TokenType.TOP,
|
||||
|
|
|
@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
|
|||
no_paren_current_date_sql,
|
||||
no_tablesample_sql,
|
||||
no_trycast_sql,
|
||||
rename_func,
|
||||
str_position_sql,
|
||||
trim_sql,
|
||||
)
|
||||
|
@ -260,6 +261,16 @@ class Postgres(Dialect):
|
|||
"TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
|
||||
}
|
||||
|
||||
BITWISE = {
|
||||
**parser.Parser.BITWISE, # type: ignore
|
||||
TokenType.HASH: exp.BitwiseXor,
|
||||
}
|
||||
|
||||
FACTOR = {
|
||||
**parser.Parser.FACTOR, # type: ignore
|
||||
TokenType.CARET: exp.Pow,
|
||||
}
|
||||
|
||||
class Generator(generator.Generator):
|
||||
TYPE_MAPPING = {
|
||||
**generator.Generator.TYPE_MAPPING, # type: ignore
|
||||
|
@ -273,6 +284,7 @@ class Postgres(Dialect):
|
|||
|
||||
TRANSFORMS = {
|
||||
**generator.Generator.TRANSFORMS, # type: ignore
|
||||
exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
|
||||
exp.ColumnDef: preprocess(
|
||||
[
|
||||
_auto_increment_to_serial,
|
||||
|
@ -285,11 +297,13 @@ class Postgres(Dialect):
|
|||
exp.JSONBExtract: lambda self, e: self.binary(e, "#>"),
|
||||
exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"),
|
||||
exp.JSONBContains: lambda self, e: self.binary(e, "?"),
|
||||
exp.Pow: lambda self, e: self.binary(e, "^"),
|
||||
exp.CurrentDate: no_paren_current_date_sql,
|
||||
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
||||
exp.DateAdd: _date_add_sql("+"),
|
||||
exp.DateSub: _date_add_sql("-"),
|
||||
exp.DateDiff: _date_diff_sql,
|
||||
exp.LogicalOr: rename_func("BOOL_OR"),
|
||||
exp.RegexpLike: lambda self, e: self.binary(e, "~"),
|
||||
exp.RegexpILike: lambda self, e: self.binary(e, "~*"),
|
||||
exp.StrPosition: str_position_sql,
|
||||
|
|
|
@ -174,6 +174,7 @@ class Presto(Dialect):
|
|||
"DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
|
||||
"DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
|
||||
"FROM_UNIXTIME": _from_unixtime,
|
||||
"NOW": exp.CurrentTimestamp.from_arg_list,
|
||||
"STRPOS": lambda args: exp.StrPosition(
|
||||
this=seq_get(args, 0),
|
||||
substr=seq_get(args, 1),
|
||||
|
@ -194,7 +195,6 @@ class Presto(Dialect):
|
|||
FUNCTION_PARSERS.pop("TRIM")
|
||||
|
||||
class Generator(generator.Generator):
|
||||
|
||||
STRUCT_DELIMITER = ("(", ")")
|
||||
|
||||
ROOT_PROPERTIES = {exp.SchemaCommentProperty}
|
||||
|
|
|
@ -93,7 +93,7 @@ class Redshift(Postgres):
|
|||
rows = [tuple_exp.expressions for tuple_exp in expression.expressions]
|
||||
selects = []
|
||||
for i, row in enumerate(rows):
|
||||
if i == 0:
|
||||
if i == 0 and expression.alias:
|
||||
row = [
|
||||
exp.alias_(value, column_name)
|
||||
for value, column_name in zip(row, expression.args["alias"].args["columns"])
|
||||
|
|
|
@ -178,11 +178,6 @@ class Snowflake(Dialect):
|
|||
),
|
||||
}
|
||||
|
||||
PROPERTY_PARSERS = {
|
||||
**parser.Parser.PROPERTY_PARSERS,
|
||||
TokenType.PARTITION_BY: lambda self: self._parse_partitioned_by(),
|
||||
}
|
||||
|
||||
class Tokenizer(tokens.Tokenizer):
|
||||
QUOTES = ["'", "$$"]
|
||||
ESCAPES = ["\\", "'"]
|
||||
|
@ -195,6 +190,7 @@ class Snowflake(Dialect):
|
|||
KEYWORDS = {
|
||||
**tokens.Tokenizer.KEYWORDS,
|
||||
"EXCLUDE": TokenType.EXCEPT,
|
||||
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
|
||||
"RENAME": TokenType.REPLACE,
|
||||
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
|
||||
"TIMESTAMP_NTZ": TokenType.TIMESTAMP,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from sqlglot import exp, parser
|
||||
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func
|
||||
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func, trim_sql
|
||||
from sqlglot.dialects.hive import Hive
|
||||
from sqlglot.helper import seq_get
|
||||
|
||||
|
@ -122,6 +122,7 @@ class Spark(Hive):
|
|||
exp.Reduce: rename_func("AGGREGATE"),
|
||||
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
|
||||
exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
|
||||
exp.Trim: trim_sql,
|
||||
exp.VariancePop: rename_func("VAR_POP"),
|
||||
exp.DateFromParts: rename_func("MAKE_DATE"),
|
||||
exp.LogicalOr: rename_func("BOOL_OR"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue