1
0
Fork 0

Merging upstream version 10.6.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 15:08:15 +01:00
parent fe1b1057f7
commit 2153103f81
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
36 changed files with 1007 additions and 270 deletions

2
.gitignore vendored
View file

@ -135,3 +135,5 @@ dmypy.json
.vscode .vscode
.DS_STORE .DS_STORE
metastore_db
spark_warehouse

View file

@ -1,6 +1,15 @@
Changelog Changelog
========= =========
v10.6.0
------
Changes:
- Breaking: Change Power to binary expression.
- New: x GLOB y support.
v10.5.0 v10.5.0
------ ------

View file

@ -33,7 +33,7 @@ from sqlglot.parser import Parser
from sqlglot.schema import MappingSchema, Schema from sqlglot.schema import MappingSchema, Schema
from sqlglot.tokens import Tokenizer, TokenType from sqlglot.tokens import Tokenizer, TokenType
__version__ = "10.5.10" __version__ = "10.6.0"
pretty = False pretty = False
"""Whether to format generated SQL by default.""" """Whether to format generated SQL by default."""

View file

@ -94,10 +94,10 @@ class Column:
return self.inverse_binary_op(exp.Mod, other) return self.inverse_binary_op(exp.Mod, other)
def __pow__(self, power: ColumnOrLiteral, modulo=None): def __pow__(self, power: ColumnOrLiteral, modulo=None):
return Column(exp.Pow(this=self.expression, power=Column(power).expression)) return Column(exp.Pow(this=self.expression, expression=Column(power).expression))
def __rpow__(self, power: ColumnOrLiteral): def __rpow__(self, power: ColumnOrLiteral):
return Column(exp.Pow(this=Column(power).expression, power=self.expression)) return Column(exp.Pow(this=Column(power).expression, expression=self.expression))
def __invert__(self): def __invert__(self):
return self.unary_op(exp.Not) return self.unary_op(exp.Not)

View file

@ -311,7 +311,7 @@ def hypot(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]
def pow(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column: def pow(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]) -> Column:
return Column.invoke_expression_over_column(col1, glotexp.Pow, power=col2) return Column.invoke_expression_over_column(col1, glotexp.Pow, expression=col2)
def row_number() -> Column: def row_number() -> Column:

View file

@ -1,17 +1,14 @@
""" """
## Dialects ## Dialects
One of the core abstractions in SQLGlot is the concept of a "dialect". The `Dialect` class essentially implements a While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
"SQLGlot dialect", which aims to be as generic and ANSI-compliant as possible. It relies on the base `Tokenizer`, to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
`Parser` and `Generator` classes to achieve this goal, so these need to be very lenient when it comes to consuming SQL transpilation framework.
SQL code.
However, there are cases where the syntax of different SQL dialects varies wildly, even for common tasks. One such The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
example is the date/time functions, which can be hard to deal with. For this reason, it's sometimes necessary to
override the base dialect in order to specialize its behavior. This can be easily done in SQLGlot: supporting new
dialects is as simple as subclassing from `Dialect` and overriding its various components (e.g. the `Parser` class),
in order to implement the target behavior.
Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
classes as needed.
### Implementing a custom Dialect ### Implementing a custom Dialect

View file

@ -169,6 +169,13 @@ class BigQuery(Dialect):
TokenType.VALUES, TokenType.VALUES,
} }
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore
"NOT DETERMINISTIC": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
),
}
class Generator(generator.Generator): class Generator(generator.Generator):
TRANSFORMS = { TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore **generator.Generator.TRANSFORMS, # type: ignore

View file

@ -66,12 +66,11 @@ def _sort_array_reverse(args):
return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
def _struct_pack_sql(self, expression): def _struct_sql(self, expression):
args = [ args = [
self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e) f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions
for e in expression.expressions
] ]
return f"STRUCT_PACK({', '.join(args)})" return f"{{{', '.join(args)}}}"
def _datatype_sql(self, expression): def _datatype_sql(self, expression):
@ -153,7 +152,7 @@ class DuckDB(Dialect):
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
exp.StrToTime: _str_to_time_sql, exp.StrToTime: _str_to_time_sql,
exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
exp.Struct: _struct_pack_sql, exp.Struct: _struct_sql,
exp.TableSample: no_tablesample_sql, exp.TableSample: no_tablesample_sql,
exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToTime: timestrtotime_sql,

View file

@ -251,7 +251,7 @@ class Hive(Dialect):
PROPERTY_PARSERS = { PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore **parser.Parser.PROPERTY_PARSERS, # type: ignore
TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties( "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
expressions=self._parse_wrapped_csv(self._parse_property) expressions=self._parse_wrapped_csv(self._parse_property)
), ),
} }

View file

@ -202,7 +202,7 @@ class MySQL(Dialect):
PROPERTY_PARSERS = { PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS, # type: ignore **parser.Parser.PROPERTY_PARSERS, # type: ignore
TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty), "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
} }
STATEMENT_PARSERS = { STATEMENT_PARSERS = {

View file

@ -74,13 +74,16 @@ class Oracle(Dialect):
def query_modifiers(self, expression, *sqls): def query_modifiers(self, expression, *sqls):
return csv( return csv(
*sqls, *sqls,
*[self.sql(sql) for sql in expression.args.get("laterals", [])], *[self.sql(sql) for sql in expression.args.get("joins") or []],
*[self.sql(sql) for sql in expression.args.get("joins", [])], self.sql(expression, "match"),
*[self.sql(sql) for sql in expression.args.get("laterals") or []],
self.sql(expression, "where"), self.sql(expression, "where"),
self.sql(expression, "group"), self.sql(expression, "group"),
self.sql(expression, "having"), self.sql(expression, "having"),
self.sql(expression, "qualify"), self.sql(expression, "qualify"),
self.sql(expression, "window"), self.seg("WINDOW ") + self.expressions(expression, "windows", flat=True)
if expression.args.get("windows")
else "",
self.sql(expression, "distribute"), self.sql(expression, "distribute"),
self.sql(expression, "sort"), self.sql(expression, "sort"),
self.sql(expression, "cluster"), self.sql(expression, "cluster"),
@ -99,6 +102,7 @@ class Oracle(Dialect):
class Tokenizer(tokens.Tokenizer): class Tokenizer(tokens.Tokenizer):
KEYWORDS = { KEYWORDS = {
**tokens.Tokenizer.KEYWORDS, **tokens.Tokenizer.KEYWORDS,
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
"MINUS": TokenType.EXCEPT, "MINUS": TokenType.EXCEPT,
"START": TokenType.BEGIN, "START": TokenType.BEGIN,
"TOP": TokenType.TOP, "TOP": TokenType.TOP,

View file

@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
no_paren_current_date_sql, no_paren_current_date_sql,
no_tablesample_sql, no_tablesample_sql,
no_trycast_sql, no_trycast_sql,
rename_func,
str_position_sql, str_position_sql,
trim_sql, trim_sql,
) )
@ -260,6 +261,16 @@ class Postgres(Dialect):
"TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
} }
BITWISE = {
**parser.Parser.BITWISE, # type: ignore
TokenType.HASH: exp.BitwiseXor,
}
FACTOR = {
**parser.Parser.FACTOR, # type: ignore
TokenType.CARET: exp.Pow,
}
class Generator(generator.Generator): class Generator(generator.Generator):
TYPE_MAPPING = { TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING, # type: ignore **generator.Generator.TYPE_MAPPING, # type: ignore
@ -273,6 +284,7 @@ class Postgres(Dialect):
TRANSFORMS = { TRANSFORMS = {
**generator.Generator.TRANSFORMS, # type: ignore **generator.Generator.TRANSFORMS, # type: ignore
exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
exp.ColumnDef: preprocess( exp.ColumnDef: preprocess(
[ [
_auto_increment_to_serial, _auto_increment_to_serial,
@ -285,11 +297,13 @@ class Postgres(Dialect):
exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), exp.JSONBExtract: lambda self, e: self.binary(e, "#>"),
exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"),
exp.JSONBContains: lambda self, e: self.binary(e, "?"), exp.JSONBContains: lambda self, e: self.binary(e, "?"),
exp.Pow: lambda self, e: self.binary(e, "^"),
exp.CurrentDate: no_paren_current_date_sql, exp.CurrentDate: no_paren_current_date_sql,
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
exp.DateAdd: _date_add_sql("+"), exp.DateAdd: _date_add_sql("+"),
exp.DateSub: _date_add_sql("-"), exp.DateSub: _date_add_sql("-"),
exp.DateDiff: _date_diff_sql, exp.DateDiff: _date_diff_sql,
exp.LogicalOr: rename_func("BOOL_OR"),
exp.RegexpLike: lambda self, e: self.binary(e, "~"), exp.RegexpLike: lambda self, e: self.binary(e, "~"),
exp.RegexpILike: lambda self, e: self.binary(e, "~*"), exp.RegexpILike: lambda self, e: self.binary(e, "~*"),
exp.StrPosition: str_position_sql, exp.StrPosition: str_position_sql,

View file

@ -174,6 +174,7 @@ class Presto(Dialect):
"DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"), "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"),
"DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"), "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"),
"FROM_UNIXTIME": _from_unixtime, "FROM_UNIXTIME": _from_unixtime,
"NOW": exp.CurrentTimestamp.from_arg_list,
"STRPOS": lambda args: exp.StrPosition( "STRPOS": lambda args: exp.StrPosition(
this=seq_get(args, 0), this=seq_get(args, 0),
substr=seq_get(args, 1), substr=seq_get(args, 1),
@ -194,7 +195,6 @@ class Presto(Dialect):
FUNCTION_PARSERS.pop("TRIM") FUNCTION_PARSERS.pop("TRIM")
class Generator(generator.Generator): class Generator(generator.Generator):
STRUCT_DELIMITER = ("(", ")") STRUCT_DELIMITER = ("(", ")")
ROOT_PROPERTIES = {exp.SchemaCommentProperty} ROOT_PROPERTIES = {exp.SchemaCommentProperty}

View file

@ -93,7 +93,7 @@ class Redshift(Postgres):
rows = [tuple_exp.expressions for tuple_exp in expression.expressions] rows = [tuple_exp.expressions for tuple_exp in expression.expressions]
selects = [] selects = []
for i, row in enumerate(rows): for i, row in enumerate(rows):
if i == 0: if i == 0 and expression.alias:
row = [ row = [
exp.alias_(value, column_name) exp.alias_(value, column_name)
for value, column_name in zip(row, expression.args["alias"].args["columns"]) for value, column_name in zip(row, expression.args["alias"].args["columns"])

View file

@ -178,11 +178,6 @@ class Snowflake(Dialect):
), ),
} }
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS,
TokenType.PARTITION_BY: lambda self: self._parse_partitioned_by(),
}
class Tokenizer(tokens.Tokenizer): class Tokenizer(tokens.Tokenizer):
QUOTES = ["'", "$$"] QUOTES = ["'", "$$"]
ESCAPES = ["\\", "'"] ESCAPES = ["\\", "'"]
@ -195,6 +190,7 @@ class Snowflake(Dialect):
KEYWORDS = { KEYWORDS = {
**tokens.Tokenizer.KEYWORDS, **tokens.Tokenizer.KEYWORDS,
"EXCLUDE": TokenType.EXCEPT, "EXCLUDE": TokenType.EXCEPT,
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
"RENAME": TokenType.REPLACE, "RENAME": TokenType.REPLACE,
"TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ,
"TIMESTAMP_NTZ": TokenType.TIMESTAMP, "TIMESTAMP_NTZ": TokenType.TIMESTAMP,

View file

@ -1,7 +1,7 @@
from __future__ import annotations from __future__ import annotations
from sqlglot import exp, parser from sqlglot import exp, parser
from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func, trim_sql
from sqlglot.dialects.hive import Hive from sqlglot.dialects.hive import Hive
from sqlglot.helper import seq_get from sqlglot.helper import seq_get
@ -122,6 +122,7 @@ class Spark(Hive):
exp.Reduce: rename_func("AGGREGATE"), exp.Reduce: rename_func("AGGREGATE"),
exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}", exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})", exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})",
exp.Trim: trim_sql,
exp.VariancePop: rename_func("VAR_POP"), exp.VariancePop: rename_func("VAR_POP"),
exp.DateFromParts: rename_func("MAKE_DATE"), exp.DateFromParts: rename_func("MAKE_DATE"),
exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalOr: rename_func("BOOL_OR"),

View file

@ -230,6 +230,7 @@ class Expression(metaclass=_Expression):
Returns a deep copy of the expression. Returns a deep copy of the expression.
""" """
new = deepcopy(self) new = deepcopy(self)
new.parent = self.parent
for item, parent, _ in new.bfs(): for item, parent, _ in new.bfs():
if isinstance(item, Expression) and parent: if isinstance(item, Expression) and parent:
item.parent = parent item.parent = parent
@ -759,6 +760,10 @@ class Create(Expression):
"this": True, "this": True,
"kind": True, "kind": True,
"expression": False, "expression": False,
"set": False,
"multiset": False,
"global_temporary": False,
"volatile": False,
"exists": False, "exists": False,
"properties": False, "properties": False,
"temporary": False, "temporary": False,
@ -1082,7 +1087,7 @@ class LoadData(Expression):
class Partition(Expression): class Partition(Expression):
pass arg_types = {"expressions": True}
class Fetch(Expression): class Fetch(Expression):
@ -1232,6 +1237,18 @@ class Lateral(UDTF):
arg_types = {"this": True, "view": False, "outer": False, "alias": False} arg_types = {"this": True, "view": False, "outer": False, "alias": False}
class MatchRecognize(Expression):
arg_types = {
"partition_by": False,
"order": False,
"measures": False,
"rows": False,
"after": False,
"pattern": False,
"define": False,
}
# Clickhouse FROM FINAL modifier # Clickhouse FROM FINAL modifier
# https://clickhouse.com/docs/en/sql-reference/statements/select/from/#final-modifier # https://clickhouse.com/docs/en/sql-reference/statements/select/from/#final-modifier
class Final(Expression): class Final(Expression):
@ -1357,8 +1374,58 @@ class SerdeProperties(Property):
arg_types = {"expressions": True} arg_types = {"expressions": True}
class FallbackProperty(Property):
arg_types = {"no": True, "protection": False}
class WithJournalTableProperty(Property):
arg_types = {"this": True}
class LogProperty(Property):
arg_types = {"no": True}
class JournalProperty(Property):
arg_types = {"no": True, "dual": False, "before": False}
class AfterJournalProperty(Property):
arg_types = {"no": True, "dual": False, "local": False}
class ChecksumProperty(Property):
arg_types = {"on": False, "default": False}
class FreespaceProperty(Property):
arg_types = {"this": True, "percent": False}
class MergeBlockRatioProperty(Property):
arg_types = {"this": False, "no": False, "default": False, "percent": False}
class DataBlocksizeProperty(Property):
arg_types = {"size": False, "units": False, "min": False, "default": False}
class BlockCompressionProperty(Property):
arg_types = {"autotemp": False, "always": False, "default": True, "manual": True, "never": True}
class IsolatedLoadingProperty(Property):
arg_types = {
"no": True,
"concurrent": True,
"for_all": True,
"for_insert": True,
"for_none": True,
}
class Properties(Expression): class Properties(Expression):
arg_types = {"expressions": True} arg_types = {"expressions": True, "before": False}
NAME_TO_PROPERTY = { NAME_TO_PROPERTY = {
"AUTO_INCREMENT": AutoIncrementProperty, "AUTO_INCREMENT": AutoIncrementProperty,
@ -1510,6 +1577,7 @@ class Subqueryable(Unionable):
QUERY_MODIFIERS = { QUERY_MODIFIERS = {
"match": False,
"laterals": False, "laterals": False,
"joins": False, "joins": False,
"pivots": False, "pivots": False,
@ -2459,6 +2527,10 @@ class AddConstraint(Expression):
arg_types = {"this": False, "expression": False, "enforced": False} arg_types = {"this": False, "expression": False, "enforced": False}
class DropPartition(Expression):
arg_types = {"expressions": True, "exists": False}
# Binary expressions like (ADD a b) # Binary expressions like (ADD a b)
class Binary(Expression): class Binary(Expression):
arg_types = {"this": True, "expression": True} arg_types = {"this": True, "expression": True}
@ -2540,6 +2612,10 @@ class Escape(Binary):
pass pass
class Glob(Binary, Predicate):
pass
class GT(Binary, Predicate): class GT(Binary, Predicate):
pass pass
@ -3126,8 +3202,7 @@ class Posexplode(Func):
pass pass
class Pow(Func): class Pow(Binary, Func):
arg_types = {"this": True, "power": True}
_sql_names = ["POWER", "POW"] _sql_names = ["POWER", "POW"]
@ -3361,7 +3436,7 @@ class Year(Func):
class Use(Expression): class Use(Expression):
pass arg_types = {"this": True, "kind": False}
class Merge(Expression): class Merge(Expression):

View file

@ -65,6 +65,8 @@ class Generator:
exp.ReturnsProperty: lambda self, e: self.naked_property(e), exp.ReturnsProperty: lambda self, e: self.naked_property(e),
exp.ExecuteAsProperty: lambda self, e: self.naked_property(e), exp.ExecuteAsProperty: lambda self, e: self.naked_property(e),
exp.VolatilityProperty: lambda self, e: e.name, exp.VolatilityProperty: lambda self, e: e.name,
exp.WithJournalTableProperty: lambda self, e: f"WITH JOURNAL TABLE={self.sql(e, 'this')}",
exp.LogProperty: lambda self, e: f"{'NO ' if e.args.get('no') else ''}LOG",
} }
# Whether 'CREATE ... TRANSIENT ... TABLE' is allowed # Whether 'CREATE ... TRANSIENT ... TABLE' is allowed
@ -97,6 +99,20 @@ class Generator:
STRUCT_DELIMITER = ("<", ">") STRUCT_DELIMITER = ("<", ">")
BEFORE_PROPERTIES = {
exp.FallbackProperty,
exp.WithJournalTableProperty,
exp.LogProperty,
exp.JournalProperty,
exp.AfterJournalProperty,
exp.ChecksumProperty,
exp.FreespaceProperty,
exp.MergeBlockRatioProperty,
exp.DataBlocksizeProperty,
exp.BlockCompressionProperty,
exp.IsolatedLoadingProperty,
}
ROOT_PROPERTIES = { ROOT_PROPERTIES = {
exp.ReturnsProperty, exp.ReturnsProperty,
exp.LanguageProperty, exp.LanguageProperty,
@ -113,8 +129,6 @@ class Generator:
exp.TableFormatProperty, exp.TableFormatProperty,
} }
WITH_SINGLE_ALTER_TABLE_ACTION = (exp.AlterColumn, exp.RenameTable, exp.AddConstraint)
WITH_SEPARATED_COMMENTS = (exp.Select, exp.From, exp.Where, exp.Binary) WITH_SEPARATED_COMMENTS = (exp.Select, exp.From, exp.Where, exp.Binary)
SENTINEL_LINE_BREAK = "__SQLGLOT__LB__" SENTINEL_LINE_BREAK = "__SQLGLOT__LB__"
@ -122,7 +136,6 @@ class Generator:
"time_mapping", "time_mapping",
"time_trie", "time_trie",
"pretty", "pretty",
"configured_pretty",
"quote_start", "quote_start",
"quote_end", "quote_end",
"identifier_start", "identifier_start",
@ -177,7 +190,6 @@ class Generator:
self.time_mapping = time_mapping or {} self.time_mapping = time_mapping or {}
self.time_trie = time_trie self.time_trie = time_trie
self.pretty = pretty if pretty is not None else sqlglot.pretty self.pretty = pretty if pretty is not None else sqlglot.pretty
self.configured_pretty = self.pretty
self.quote_start = quote_start or "'" self.quote_start = quote_start or "'"
self.quote_end = quote_end or "'" self.quote_end = quote_end or "'"
self.identifier_start = identifier_start or '"' self.identifier_start = identifier_start or '"'
@ -442,8 +454,20 @@ class Generator:
return "UNIQUE" return "UNIQUE"
def create_sql(self, expression: exp.Create) -> str: def create_sql(self, expression: exp.Create) -> str:
this = self.sql(expression, "this")
kind = self.sql(expression, "kind").upper() kind = self.sql(expression, "kind").upper()
has_before_properties = expression.args.get("properties")
has_before_properties = (
has_before_properties.args.get("before") if has_before_properties else None
)
if kind == "TABLE" and has_before_properties:
this_name = self.sql(expression.this, "this")
this_properties = self.sql(expression, "properties")
this_schema = f"({self.expressions(expression.this)})"
this = f"{this_name}, {this_properties} {this_schema}"
properties = ""
else:
this = self.sql(expression, "this")
properties = self.sql(expression, "properties")
begin = " BEGIN" if expression.args.get("begin") else "" begin = " BEGIN" if expression.args.get("begin") else ""
expression_sql = self.sql(expression, "expression") expression_sql = self.sql(expression, "expression")
expression_sql = f" AS{begin}{self.sep()}{expression_sql}" if expression_sql else "" expression_sql = f" AS{begin}{self.sep()}{expression_sql}" if expression_sql else ""
@ -456,7 +480,10 @@ class Generator:
exists_sql = " IF NOT EXISTS" if expression.args.get("exists") else "" exists_sql = " IF NOT EXISTS" if expression.args.get("exists") else ""
unique = " UNIQUE" if expression.args.get("unique") else "" unique = " UNIQUE" if expression.args.get("unique") else ""
materialized = " MATERIALIZED" if expression.args.get("materialized") else "" materialized = " MATERIALIZED" if expression.args.get("materialized") else ""
properties = self.sql(expression, "properties") set_ = " SET" if expression.args.get("set") else ""
multiset = " MULTISET" if expression.args.get("multiset") else ""
global_temporary = " GLOBAL TEMPORARY" if expression.args.get("global_temporary") else ""
volatile = " VOLATILE" if expression.args.get("volatile") else ""
data = expression.args.get("data") data = expression.args.get("data")
if data is None: if data is None:
data = "" data = ""
@ -475,7 +502,7 @@ class Generator:
indexes = expression.args.get("indexes") indexes = expression.args.get("indexes")
index_sql = "" index_sql = ""
if indexes is not None: if indexes:
indexes_sql = [] indexes_sql = []
for index in indexes: for index in indexes:
ind_unique = " UNIQUE" if index.args.get("unique") else "" ind_unique = " UNIQUE" if index.args.get("unique") else ""
@ -500,6 +527,10 @@ class Generator:
external, external,
unique, unique,
materialized, materialized,
set_,
multiset,
global_temporary,
volatile,
) )
) )
no_schema_binding = ( no_schema_binding = (
@ -569,13 +600,14 @@ class Generator:
def delete_sql(self, expression: exp.Delete) -> str: def delete_sql(self, expression: exp.Delete) -> str:
this = self.sql(expression, "this") this = self.sql(expression, "this")
this = f" FROM {this}" if this else ""
using_sql = ( using_sql = (
f" USING {self.expressions(expression, 'using', sep=', USING ')}" f" USING {self.expressions(expression, 'using', sep=', USING ')}"
if expression.args.get("using") if expression.args.get("using")
else "" else ""
) )
where_sql = self.sql(expression, "where") where_sql = self.sql(expression, "where")
sql = f"DELETE FROM {this}{using_sql}{where_sql}" sql = f"DELETE{this}{using_sql}{where_sql}"
return self.prepend_ctes(expression, sql) return self.prepend_ctes(expression, sql)
def drop_sql(self, expression: exp.Drop) -> str: def drop_sql(self, expression: exp.Drop) -> str:
@ -630,28 +662,27 @@ class Generator:
return f"N{self.sql(expression, 'this')}" return f"N{self.sql(expression, 'this')}"
def partition_sql(self, expression: exp.Partition) -> str: def partition_sql(self, expression: exp.Partition) -> str:
keys = csv( return f"PARTITION({self.expressions(expression)})"
*[
f"""{prop.name}='{prop.text("value")}'""" if prop.text("value") else prop.name
for prop in expression.this
]
)
return f"PARTITION({keys})"
def properties_sql(self, expression: exp.Properties) -> str: def properties_sql(self, expression: exp.Properties) -> str:
before_properties = []
root_properties = [] root_properties = []
with_properties = [] with_properties = []
for p in expression.expressions: for p in expression.expressions:
p_class = p.__class__ p_class = p.__class__
if p_class in self.WITH_PROPERTIES: if p_class in self.BEFORE_PROPERTIES:
before_properties.append(p)
elif p_class in self.WITH_PROPERTIES:
with_properties.append(p) with_properties.append(p)
elif p_class in self.ROOT_PROPERTIES: elif p_class in self.ROOT_PROPERTIES:
root_properties.append(p) root_properties.append(p)
return self.root_properties( return (
exp.Properties(expressions=root_properties) self.properties(exp.Properties(expressions=before_properties), before=True)
) + self.with_properties(exp.Properties(expressions=with_properties)) + self.root_properties(exp.Properties(expressions=root_properties))
+ self.with_properties(exp.Properties(expressions=with_properties))
)
def root_properties(self, properties: exp.Properties) -> str: def root_properties(self, properties: exp.Properties) -> str:
if properties.expressions: if properties.expressions:
@ -659,13 +690,17 @@ class Generator:
return "" return ""
def properties( def properties(
self, properties: exp.Properties, prefix: str = "", sep: str = ", ", suffix: str = "" self,
properties: exp.Properties,
prefix: str = "",
sep: str = ", ",
suffix: str = "",
before: bool = False,
) -> str: ) -> str:
if properties.expressions: if properties.expressions:
expressions = self.expressions(properties, sep=sep, indent=False) expressions = self.expressions(properties, sep=sep, indent=False)
return ( expressions = expressions if before else self.wrap(expressions)
f"{prefix}{' ' if prefix and prefix != ' ' else ''}{self.wrap(expressions)}{suffix}" return f"{prefix}{' ' if prefix and prefix != ' ' else ''}{expressions}{suffix}"
)
return "" return ""
def with_properties(self, properties: exp.Properties) -> str: def with_properties(self, properties: exp.Properties) -> str:
@ -687,6 +722,98 @@ class Generator:
options = f" {options}" if options else "" options = f" {options}" if options else ""
return f"LIKE {self.sql(expression, 'this')}{options}" return f"LIKE {self.sql(expression, 'this')}{options}"
def fallbackproperty_sql(self, expression: exp.FallbackProperty) -> str:
no = "NO " if expression.args.get("no") else ""
protection = " PROTECTION" if expression.args.get("protection") else ""
return f"{no}FALLBACK{protection}"
def journalproperty_sql(self, expression: exp.JournalProperty) -> str:
no = "NO " if expression.args.get("no") else ""
dual = "DUAL " if expression.args.get("dual") else ""
before = "BEFORE " if expression.args.get("before") else ""
return f"{no}{dual}{before}JOURNAL"
def freespaceproperty_sql(self, expression: exp.FreespaceProperty) -> str:
freespace = self.sql(expression, "this")
percent = " PERCENT" if expression.args.get("percent") else ""
return f"FREESPACE={freespace}{percent}"
def afterjournalproperty_sql(self, expression: exp.AfterJournalProperty) -> str:
no = "NO " if expression.args.get("no") else ""
dual = "DUAL " if expression.args.get("dual") else ""
local = ""
if expression.args.get("local") is not None:
local = "LOCAL " if expression.args.get("local") else "NOT LOCAL "
return f"{no}{dual}{local}AFTER JOURNAL"
def checksumproperty_sql(self, expression: exp.ChecksumProperty) -> str:
if expression.args.get("default"):
property = "DEFAULT"
elif expression.args.get("on"):
property = "ON"
else:
property = "OFF"
return f"CHECKSUM={property}"
def mergeblockratioproperty_sql(self, expression: exp.MergeBlockRatioProperty) -> str:
if expression.args.get("no"):
return "NO MERGEBLOCKRATIO"
if expression.args.get("default"):
return "DEFAULT MERGEBLOCKRATIO"
percent = " PERCENT" if expression.args.get("percent") else ""
return f"MERGEBLOCKRATIO={self.sql(expression, 'this')}{percent}"
def datablocksizeproperty_sql(self, expression: exp.DataBlocksizeProperty) -> str:
default = expression.args.get("default")
min = expression.args.get("min")
if default is not None or min is not None:
if default:
property = "DEFAULT"
elif min:
property = "MINIMUM"
else:
property = "MAXIMUM"
return f"{property} DATABLOCKSIZE"
else:
units = expression.args.get("units")
units = f" {units}" if units else ""
return f"DATABLOCKSIZE={self.sql(expression, 'size')}{units}"
def blockcompressionproperty_sql(self, expression: exp.BlockCompressionProperty) -> str:
autotemp = expression.args.get("autotemp")
always = expression.args.get("always")
default = expression.args.get("default")
manual = expression.args.get("manual")
never = expression.args.get("never")
if autotemp is not None:
property = f"AUTOTEMP({self.expressions(autotemp)})"
elif always:
property = "ALWAYS"
elif default:
property = "DEFAULT"
elif manual:
property = "MANUAL"
elif never:
property = "NEVER"
return f"BLOCKCOMPRESSION={property}"
def isolatedloadingproperty_sql(self, expression: exp.IsolatedLoadingProperty) -> str:
no = expression.args.get("no")
no = " NO" if no else ""
concurrent = expression.args.get("concurrent")
concurrent = " CONCURRENT" if concurrent else ""
for_ = ""
if expression.args.get("for_all"):
for_ = " FOR ALL"
elif expression.args.get("for_insert"):
for_ = " FOR INSERT"
elif expression.args.get("for_none"):
for_ = " FOR NONE"
return f"WITH{no}{concurrent} ISOLATED LOADING{for_}"
def insert_sql(self, expression: exp.Insert) -> str: def insert_sql(self, expression: exp.Insert) -> str:
overwrite = expression.args.get("overwrite") overwrite = expression.args.get("overwrite")
@ -833,10 +960,21 @@ class Generator:
grouping_sets = ( grouping_sets = (
f"{self.seg('GROUPING SETS')} {self.wrap(grouping_sets)}" if grouping_sets else "" f"{self.seg('GROUPING SETS')} {self.wrap(grouping_sets)}" if grouping_sets else ""
) )
cube = expression.args.get("cube")
if cube is True:
cube = self.seg("WITH CUBE")
else:
cube = self.expressions(expression, key="cube", indent=False) cube = self.expressions(expression, key="cube", indent=False)
cube = f"{self.seg('CUBE')} {self.wrap(cube)}" if cube else "" cube = f"{self.seg('CUBE')} {self.wrap(cube)}" if cube else ""
rollup = expression.args.get("rollup")
if rollup is True:
rollup = self.seg("WITH ROLLUP")
else:
rollup = self.expressions(expression, key="rollup", indent=False) rollup = self.expressions(expression, key="rollup", indent=False)
rollup = f"{self.seg('ROLLUP')} {self.wrap(rollup)}" if rollup else "" rollup = f"{self.seg('ROLLUP')} {self.wrap(rollup)}" if rollup else ""
return f"{group_by}{grouping_sets}{cube}{rollup}" return f"{group_by}{grouping_sets}{cube}{rollup}"
def having_sql(self, expression: exp.Having) -> str: def having_sql(self, expression: exp.Having) -> str:
@ -980,10 +1118,37 @@ class Generator:
return f"{self.sql(expression, 'this')}{sort_order}{nulls_sort_change}" return f"{self.sql(expression, 'this')}{sort_order}{nulls_sort_change}"
def matchrecognize_sql(self, expression: exp.MatchRecognize) -> str:
partition = self.partition_by_sql(expression)
order = self.sql(expression, "order")
measures = self.sql(expression, "measures")
measures = self.seg(f"MEASURES {measures}") if measures else ""
rows = self.sql(expression, "rows")
rows = self.seg(rows) if rows else ""
after = self.sql(expression, "after")
after = self.seg(after) if after else ""
pattern = self.sql(expression, "pattern")
pattern = self.seg(f"PATTERN ({pattern})") if pattern else ""
define = self.sql(expression, "define")
define = self.seg(f"DEFINE {define}") if define else ""
body = "".join(
(
partition,
order,
measures,
rows,
after,
pattern,
define,
)
)
return f"{self.seg('MATCH_RECOGNIZE')} {self.wrap(body)}"
def query_modifiers(self, expression: exp.Expression, *sqls: str) -> str: def query_modifiers(self, expression: exp.Expression, *sqls: str) -> str:
return csv( return csv(
*sqls, *sqls,
*[self.sql(sql) for sql in expression.args.get("joins") or []], *[self.sql(sql) for sql in expression.args.get("joins") or []],
self.sql(expression, "match"),
*[self.sql(sql) for sql in expression.args.get("laterals") or []], *[self.sql(sql) for sql in expression.args.get("laterals") or []],
self.sql(expression, "where"), self.sql(expression, "where"),
self.sql(expression, "group"), self.sql(expression, "group"),
@ -1092,8 +1257,7 @@ class Generator:
def window_sql(self, expression: exp.Window) -> str: def window_sql(self, expression: exp.Window) -> str:
this = self.sql(expression, "this") this = self.sql(expression, "this")
partition = self.expressions(expression, key="partition_by", flat=True) partition = self.partition_by_sql(expression)
partition = f"PARTITION BY {partition}" if partition else ""
order = expression.args.get("order") order = expression.args.get("order")
order_sql = self.order_sql(order, flat=True) if order else "" order_sql = self.order_sql(order, flat=True) if order else ""
@ -1113,6 +1277,10 @@ class Generator:
return f"{this} ({window_args.strip()})" return f"{this} ({window_args.strip()})"
def partition_by_sql(self, expression: exp.Window | exp.MatchRecognize) -> str:
partition = self.expressions(expression, key="partition_by", flat=True)
return f"PARTITION BY {partition}" if partition else ""
def window_spec_sql(self, expression: exp.WindowSpec) -> str: def window_spec_sql(self, expression: exp.WindowSpec) -> str:
kind = self.sql(expression, "kind") kind = self.sql(expression, "kind")
start = csv(self.sql(expression, "start"), self.sql(expression, "start_side"), sep=" ") start = csv(self.sql(expression, "start"), self.sql(expression, "start_side"), sep=" ")
@ -1386,16 +1554,19 @@ class Generator:
actions = self.expressions(expression, "actions", prefix="ADD COLUMN ") actions = self.expressions(expression, "actions", prefix="ADD COLUMN ")
elif isinstance(actions[0], exp.Schema): elif isinstance(actions[0], exp.Schema):
actions = self.expressions(expression, "actions", prefix="ADD COLUMNS ") actions = self.expressions(expression, "actions", prefix="ADD COLUMNS ")
elif isinstance(actions[0], exp.Drop): elif isinstance(actions[0], exp.Delete):
actions = self.expressions(expression, "actions") actions = self.expressions(expression, "actions", flat=True)
elif isinstance(actions[0], self.WITH_SINGLE_ALTER_TABLE_ACTION):
actions = self.sql(actions[0])
else: else:
self.unsupported(f"Unsupported ALTER TABLE action {actions[0].__class__.__name__}") actions = self.expressions(expression, "actions")
exists = " IF EXISTS" if expression.args.get("exists") else "" exists = " IF EXISTS" if expression.args.get("exists") else ""
return f"ALTER TABLE{exists} {self.sql(expression, 'this')} {actions}" return f"ALTER TABLE{exists} {self.sql(expression, 'this')} {actions}"
def droppartition_sql(self, expression: exp.DropPartition) -> str:
expressions = self.expressions(expression)
exists = " IF EXISTS " if expression.args.get("exists") else " "
return f"DROP{exists}{expressions}"
def addconstraint_sql(self, expression: exp.AddConstraint) -> str: def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
this = self.sql(expression, "this") this = self.sql(expression, "this")
expression_ = self.sql(expression, "expression") expression_ = self.sql(expression, "expression")
@ -1447,6 +1618,9 @@ class Generator:
def escape_sql(self, expression: exp.Escape) -> str: def escape_sql(self, expression: exp.Escape) -> str:
return self.binary(expression, "ESCAPE") return self.binary(expression, "ESCAPE")
def glob_sql(self, expression: exp.Glob) -> str:
return self.binary(expression, "GLOB")
def gt_sql(self, expression: exp.GT) -> str: def gt_sql(self, expression: exp.GT) -> str:
return self.binary(expression, ">") return self.binary(expression, ">")
@ -1499,7 +1673,11 @@ class Generator:
return f"TRY_CAST({self.sql(expression, 'this')} AS {self.sql(expression, 'to')})" return f"TRY_CAST({self.sql(expression, 'this')} AS {self.sql(expression, 'to')})"
def use_sql(self, expression: exp.Use) -> str: def use_sql(self, expression: exp.Use) -> str:
return f"USE {self.sql(expression, 'this')}" kind = self.sql(expression, "kind")
kind = f" {kind}" if kind else ""
this = self.sql(expression, "this")
this = f" {this}" if this else ""
return f"USE{kind}{this}"
def binary(self, expression: exp.Binary, op: str) -> str: def binary(self, expression: exp.Binary, op: str) -> str:
return f"{self.sql(expression, 'this')} {op} {self.sql(expression, 'expression')}" return f"{self.sql(expression, 'this')} {op} {self.sql(expression, 'expression')}"

View file

@ -2,6 +2,14 @@ from sqlglot import exp
def expand_multi_table_selects(expression): def expand_multi_table_selects(expression):
"""
Replace multiple FROM expressions with JOINs.
Example:
>>> from sqlglot import parse_one
>>> expand_multi_table_selects(parse_one("SELECT * FROM x, y")).sql()
'SELECT * FROM x CROSS JOIN y'
"""
for from_ in expression.find_all(exp.From): for from_ in expression.find_all(exp.From):
parent = from_.parent parent = from_.parent

View file

@ -11,7 +11,7 @@ def isolate_table_selects(expression, schema=None):
if len(scope.selected_sources) == 1: if len(scope.selected_sources) == 1:
continue continue
for (_, source) in scope.selected_sources.values(): for _, source in scope.selected_sources.values():
if not isinstance(source, exp.Table) or not schema.column_names(source): if not isinstance(source, exp.Table) or not schema.column_names(source):
continue continue

View file

@ -6,6 +6,11 @@ from sqlglot.optimizer.simplify import simplify
def optimize_joins(expression): def optimize_joins(expression):
""" """
Removes cross joins if possible and reorder joins based on predicate dependencies. Removes cross joins if possible and reorder joins based on predicate dependencies.
Example:
>>> from sqlglot import parse_one
>>> optimize_joins(parse_one("SELECT * FROM x CROSS JOIN y JOIN z ON x.a = z.a AND y.a = z.a")).sql()
'SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = z.a'
""" """
for select in expression.find_all(exp.Select): for select in expression.find_all(exp.Select):
references = {} references = {}

View file

@ -64,7 +64,6 @@ def optimize(expression, schema=None, db=None, catalog=None, rules=RULES, **kwar
possible_kwargs = {"db": db, "catalog": catalog, "schema": schema, **kwargs} possible_kwargs = {"db": db, "catalog": catalog, "schema": schema, **kwargs}
expression = expression.copy() expression = expression.copy()
for rule in rules: for rule in rules:
# Find any additional rule parameters, beyond `expression` # Find any additional rule parameters, beyond `expression`
rule_params = rule.__code__.co_varnames rule_params = rule.__code__.co_varnames
rule_kwargs = { rule_kwargs = {

View file

@ -175,13 +175,9 @@ class Parser(metaclass=_Parser):
TokenType.DEFAULT, TokenType.DEFAULT,
TokenType.DELETE, TokenType.DELETE,
TokenType.DESCRIBE, TokenType.DESCRIBE,
TokenType.DETERMINISTIC,
TokenType.DIV, TokenType.DIV,
TokenType.DISTKEY,
TokenType.DISTSTYLE,
TokenType.END, TokenType.END,
TokenType.EXECUTE, TokenType.EXECUTE,
TokenType.ENGINE,
TokenType.ESCAPE, TokenType.ESCAPE,
TokenType.FALSE, TokenType.FALSE,
TokenType.FIRST, TokenType.FIRST,
@ -194,13 +190,10 @@ class Parser(metaclass=_Parser):
TokenType.IF, TokenType.IF,
TokenType.INDEX, TokenType.INDEX,
TokenType.ISNULL, TokenType.ISNULL,
TokenType.IMMUTABLE,
TokenType.INTERVAL, TokenType.INTERVAL,
TokenType.LAZY, TokenType.LAZY,
TokenType.LANGUAGE,
TokenType.LEADING, TokenType.LEADING,
TokenType.LOCAL, TokenType.LOCAL,
TokenType.LOCATION,
TokenType.MATERIALIZED, TokenType.MATERIALIZED,
TokenType.MERGE, TokenType.MERGE,
TokenType.NATURAL, TokenType.NATURAL,
@ -209,13 +202,11 @@ class Parser(metaclass=_Parser):
TokenType.ONLY, TokenType.ONLY,
TokenType.OPTIONS, TokenType.OPTIONS,
TokenType.ORDINALITY, TokenType.ORDINALITY,
TokenType.PARTITIONED_BY,
TokenType.PERCENT, TokenType.PERCENT,
TokenType.PIVOT, TokenType.PIVOT,
TokenType.PRECEDING, TokenType.PRECEDING,
TokenType.RANGE, TokenType.RANGE,
TokenType.REFERENCES, TokenType.REFERENCES,
TokenType.RETURNS,
TokenType.ROW, TokenType.ROW,
TokenType.ROWS, TokenType.ROWS,
TokenType.SCHEMA, TokenType.SCHEMA,
@ -225,10 +216,7 @@ class Parser(metaclass=_Parser):
TokenType.SET, TokenType.SET,
TokenType.SHOW, TokenType.SHOW,
TokenType.SORTKEY, TokenType.SORTKEY,
TokenType.STABLE,
TokenType.STORED,
TokenType.TABLE, TokenType.TABLE,
TokenType.TABLE_FORMAT,
TokenType.TEMPORARY, TokenType.TEMPORARY,
TokenType.TOP, TokenType.TOP,
TokenType.TRAILING, TokenType.TRAILING,
@ -237,7 +225,6 @@ class Parser(metaclass=_Parser):
TokenType.UNIQUE, TokenType.UNIQUE,
TokenType.UNLOGGED, TokenType.UNLOGGED,
TokenType.UNPIVOT, TokenType.UNPIVOT,
TokenType.PROPERTIES,
TokenType.PROCEDURE, TokenType.PROCEDURE,
TokenType.VIEW, TokenType.VIEW,
TokenType.VOLATILE, TokenType.VOLATILE,
@ -448,7 +435,12 @@ class Parser(metaclass=_Parser):
TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
TokenType.UNCACHE: lambda self: self._parse_uncache(), TokenType.UNCACHE: lambda self: self._parse_uncache(),
TokenType.UPDATE: lambda self: self._parse_update(), TokenType.UPDATE: lambda self: self._parse_update(),
TokenType.USE: lambda self: self.expression(exp.Use, this=self._parse_id_var()), TokenType.USE: lambda self: self.expression(
exp.Use,
kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
and exp.Var(this=self._prev.text),
this=self._parse_table(schema=False),
),
} }
UNARY_PARSERS = { UNARY_PARSERS = {
@ -492,6 +484,9 @@ class Parser(metaclass=_Parser):
RANGE_PARSERS = { RANGE_PARSERS = {
TokenType.BETWEEN: lambda self, this: self._parse_between(this), TokenType.BETWEEN: lambda self, this: self._parse_between(this),
TokenType.GLOB: lambda self, this: self._parse_escape(
self.expression(exp.Glob, this=this, expression=self._parse_bitwise())
),
TokenType.IN: lambda self, this: self._parse_in(this), TokenType.IN: lambda self, this: self._parse_in(this),
TokenType.IS: lambda self, this: self._parse_is(this), TokenType.IS: lambda self, this: self._parse_is(this),
TokenType.LIKE: lambda self, this: self._parse_escape( TokenType.LIKE: lambda self, this: self._parse_escape(
@ -512,45 +507,66 @@ class Parser(metaclass=_Parser):
} }
PROPERTY_PARSERS = { PROPERTY_PARSERS = {
TokenType.AUTO_INCREMENT: lambda self: self._parse_property_assignment( "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
exp.AutoIncrementProperty "CHARACTER SET": lambda self: self._parse_character_set(),
), "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
TokenType.CHARACTER_SET: lambda self: self._parse_character_set(), "PARTITION BY": lambda self: self._parse_partitioned_by(),
TokenType.LOCATION: lambda self: self._parse_property_assignment(exp.LocationProperty), "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
TokenType.PARTITIONED_BY: lambda self: self._parse_partitioned_by(), "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
TokenType.SCHEMA_COMMENT: lambda self: self._parse_property_assignment( "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
exp.SchemaCommentProperty "STORED": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
), "DISTKEY": lambda self: self._parse_distkey(),
TokenType.STORED: lambda self: self._parse_property_assignment(exp.FileFormatProperty), "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
TokenType.DISTKEY: lambda self: self._parse_distkey(), "SORTKEY": lambda self: self._parse_sortkey(),
TokenType.DISTSTYLE: lambda self: self._parse_property_assignment(exp.DistStyleProperty), "LIKE": lambda self: self._parse_create_like(),
TokenType.SORTKEY: lambda self: self._parse_sortkey(), "RETURNS": lambda self: self._parse_returns(),
TokenType.LIKE: lambda self: self._parse_create_like(), "ROW": lambda self: self._parse_row(),
TokenType.RETURNS: lambda self: self._parse_returns(), "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
TokenType.ROW: lambda self: self._parse_row(), "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
TokenType.COLLATE: lambda self: self._parse_property_assignment(exp.CollateProperty), "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
TokenType.COMMENT: lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
TokenType.FORMAT: lambda self: self._parse_property_assignment(exp.FileFormatProperty), "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
TokenType.TABLE_FORMAT: lambda self: self._parse_property_assignment( "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
exp.TableFormatProperty "DETERMINISTIC": lambda self: self.expression(
),
TokenType.USING: lambda self: self._parse_property_assignment(exp.TableFormatProperty),
TokenType.LANGUAGE: lambda self: self._parse_property_assignment(exp.LanguageProperty),
TokenType.EXECUTE: lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
TokenType.DETERMINISTIC: lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
), ),
TokenType.IMMUTABLE: lambda self: self.expression( "IMMUTABLE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE") exp.VolatilityProperty, this=exp.Literal.string("IMMUTABLE")
), ),
TokenType.STABLE: lambda self: self.expression( "STABLE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("STABLE") exp.VolatilityProperty, this=exp.Literal.string("STABLE")
), ),
TokenType.VOLATILE: lambda self: self.expression( "VOLATILE": lambda self: self.expression(
exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") exp.VolatilityProperty, this=exp.Literal.string("VOLATILE")
), ),
TokenType.WITH: lambda self: self._parse_wrapped_csv(self._parse_property), "WITH": lambda self: self._parse_with_property(),
TokenType.PROPERTIES: lambda self: self._parse_wrapped_csv(self._parse_property), "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
"FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
"LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
"BEFORE": lambda self: self._parse_journal(
no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
),
"JOURNAL": lambda self: self._parse_journal(
no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
),
"AFTER": lambda self: self._parse_afterjournal(
no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
),
"LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
"NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
"CHECKSUM": lambda self: self._parse_checksum(),
"FREESPACE": lambda self: self._parse_freespace(),
"MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
),
"MIN": lambda self: self._parse_datablocksize(),
"MINIMUM": lambda self: self._parse_datablocksize(),
"MAX": lambda self: self._parse_datablocksize(),
"MAXIMUM": lambda self: self._parse_datablocksize(),
"DATABLOCKSIZE": lambda self: self._parse_datablocksize(
default=self._prev.text.upper() == "DEFAULT"
),
"BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
} }
CONSTRAINT_PARSERS = { CONSTRAINT_PARSERS = {
@ -580,6 +596,7 @@ class Parser(metaclass=_Parser):
} }
QUERY_MODIFIER_PARSERS = { QUERY_MODIFIER_PARSERS = {
"match": lambda self: self._parse_match_recognize(),
"where": lambda self: self._parse_where(), "where": lambda self: self._parse_where(),
"group": lambda self: self._parse_group(), "group": lambda self: self._parse_group(),
"having": lambda self: self._parse_having(), "having": lambda self: self._parse_having(),
@ -627,7 +644,6 @@ class Parser(metaclass=_Parser):
"max_errors", "max_errors",
"null_ordering", "null_ordering",
"_tokens", "_tokens",
"_chunks",
"_index", "_index",
"_curr", "_curr",
"_next", "_next",
@ -660,7 +676,6 @@ class Parser(metaclass=_Parser):
self.sql = "" self.sql = ""
self.errors = [] self.errors = []
self._tokens = [] self._tokens = []
self._chunks = [[]]
self._index = 0 self._index = 0
self._curr = None self._curr = None
self._next = None self._next = None
@ -728,17 +743,18 @@ class Parser(metaclass=_Parser):
self.reset() self.reset()
self.sql = sql or "" self.sql = sql or ""
total = len(raw_tokens) total = len(raw_tokens)
chunks: t.List[t.List[Token]] = [[]]
for i, token in enumerate(raw_tokens): for i, token in enumerate(raw_tokens):
if token.token_type == TokenType.SEMICOLON: if token.token_type == TokenType.SEMICOLON:
if i < total - 1: if i < total - 1:
self._chunks.append([]) chunks.append([])
else: else:
self._chunks[-1].append(token) chunks[-1].append(token)
expressions = [] expressions = []
for tokens in self._chunks: for tokens in chunks:
self._index = -1 self._index = -1
self._tokens = tokens self._tokens = tokens
self._advance() self._advance()
@ -771,7 +787,7 @@ class Parser(metaclass=_Parser):
error level setting. error level setting.
""" """
token = token or self._curr or self._prev or Token.string("") token = token or self._curr or self._prev or Token.string("")
start = self._find_token(token, self.sql) start = self._find_token(token)
end = start + len(token.text) end = start + len(token.text)
start_context = self.sql[max(start - self.error_message_context, 0) : start] start_context = self.sql[max(start - self.error_message_context, 0) : start]
highlight = self.sql[start:end] highlight = self.sql[start:end]
@ -833,13 +849,16 @@ class Parser(metaclass=_Parser):
for error_message in expression.error_messages(args): for error_message in expression.error_messages(args):
self.raise_error(error_message) self.raise_error(error_message)
def _find_token(self, token: Token, sql: str) -> int: def _find_sql(self, start: Token, end: Token) -> str:
return self.sql[self._find_token(start) : self._find_token(end)]
def _find_token(self, token: Token) -> int:
line = 1 line = 1
col = 1 col = 1
index = 0 index = 0
while line < token.line or col < token.col: while line < token.line or col < token.col:
if Tokenizer.WHITE_SPACE.get(sql[index]) == TokenType.BREAK: if Tokenizer.WHITE_SPACE.get(self.sql[index]) == TokenType.BREAK:
line += 1 line += 1
col = 1 col = 1
else: else:
@ -911,6 +930,10 @@ class Parser(metaclass=_Parser):
def _parse_create(self) -> t.Optional[exp.Expression]: def _parse_create(self) -> t.Optional[exp.Expression]:
replace = self._match_pair(TokenType.OR, TokenType.REPLACE) replace = self._match_pair(TokenType.OR, TokenType.REPLACE)
set_ = self._match(TokenType.SET) # Teradata
multiset = self._match_text_seq("MULTISET") # Teradata
global_temporary = self._match_text_seq("GLOBAL", "TEMPORARY") # Teradata
volatile = self._match(TokenType.VOLATILE) # Teradata
temporary = self._match(TokenType.TEMPORARY) temporary = self._match(TokenType.TEMPORARY)
transient = self._match_text_seq("TRANSIENT") transient = self._match_text_seq("TRANSIENT")
external = self._match_text_seq("EXTERNAL") external = self._match_text_seq("EXTERNAL")
@ -954,9 +977,17 @@ class Parser(metaclass=_Parser):
TokenType.VIEW, TokenType.VIEW,
TokenType.SCHEMA, TokenType.SCHEMA,
): ):
this = self._parse_table(schema=True) table_parts = self._parse_table_parts(schema=True)
if self._match(TokenType.COMMA): # comma-separated properties before schema definition
properties = self._parse_properties(before=True)
this = self._parse_schema(this=table_parts)
if not properties: # properties after schema definition
properties = self._parse_properties() properties = self._parse_properties()
if self._match(TokenType.ALIAS):
self._match(TokenType.ALIAS)
expression = self._parse_ddl_select() expression = self._parse_ddl_select()
if create_token.token_type == TokenType.TABLE: if create_token.token_type == TokenType.TABLE:
@ -988,6 +1019,10 @@ class Parser(metaclass=_Parser):
this=this, this=this,
kind=create_token.text, kind=create_token.text,
expression=expression, expression=expression,
set=set_,
multiset=multiset,
global_temporary=global_temporary,
volatile=volatile,
exists=exists, exists=exists,
properties=properties, properties=properties,
temporary=temporary, temporary=temporary,
@ -1004,9 +1039,19 @@ class Parser(metaclass=_Parser):
begin=begin, begin=begin,
) )
def _parse_property_before(self) -> t.Optional[exp.Expression]:
self._match_text_seq("NO")
self._match_text_seq("DUAL")
self._match_text_seq("DEFAULT")
if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
return None
def _parse_property(self) -> t.Optional[exp.Expression]: def _parse_property(self) -> t.Optional[exp.Expression]:
if self._match_set(self.PROPERTY_PARSERS): if self._match_texts(self.PROPERTY_PARSERS):
return self.PROPERTY_PARSERS[self._prev.token_type](self) return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
return self._parse_character_set(True) return self._parse_character_set(True)
@ -1033,6 +1078,166 @@ class Parser(metaclass=_Parser):
this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), this=self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
) )
def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
properties = []
while True:
if before:
self._match(TokenType.COMMA)
identified_property = self._parse_property_before()
else:
identified_property = self._parse_property()
if not identified_property:
break
for p in ensure_collection(identified_property):
properties.append(p)
if properties:
return self.expression(exp.Properties, expressions=properties, before=before)
return None
def _parse_fallback(self, no=False) -> exp.Expression:
self._match_text_seq("FALLBACK")
return self.expression(
exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
)
def _parse_with_property(
self,
) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
if self._match(TokenType.L_PAREN, advance=False):
return self._parse_wrapped_csv(self._parse_property)
if not self._next:
return None
if self._next.text.upper() == "JOURNAL":
return self._parse_withjournaltable()
return self._parse_withisolatedloading()
def _parse_withjournaltable(self) -> exp.Expression:
self._match_text_seq("WITH", "JOURNAL", "TABLE")
self._match(TokenType.EQ)
return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
def _parse_log(self, no=False) -> exp.Expression:
self._match_text_seq("LOG")
return self.expression(exp.LogProperty, no=no)
def _parse_journal(self, no=False, dual=False) -> exp.Expression:
before = self._match_text_seq("BEFORE")
self._match_text_seq("JOURNAL")
return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
self._match_text_seq("NOT")
self._match_text_seq("LOCAL")
self._match_text_seq("AFTER", "JOURNAL")
return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
def _parse_checksum(self) -> exp.Expression:
self._match_text_seq("CHECKSUM")
self._match(TokenType.EQ)
on = None
if self._match(TokenType.ON):
on = True
elif self._match_text_seq("OFF"):
on = False
default = self._match(TokenType.DEFAULT)
return self.expression(
exp.ChecksumProperty,
on=on,
default=default,
)
def _parse_freespace(self) -> exp.Expression:
self._match_text_seq("FREESPACE")
self._match(TokenType.EQ)
return self.expression(
exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
)
def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
self._match_text_seq("MERGEBLOCKRATIO")
if self._match(TokenType.EQ):
return self.expression(
exp.MergeBlockRatioProperty,
this=self._parse_number(),
percent=self._match(TokenType.PERCENT),
)
else:
return self.expression(
exp.MergeBlockRatioProperty,
no=no,
default=default,
)
def _parse_datablocksize(self, default=None) -> exp.Expression:
if default:
self._match_text_seq("DATABLOCKSIZE")
return self.expression(exp.DataBlocksizeProperty, default=True)
elif self._match_texts(("MIN", "MINIMUM")):
self._match_text_seq("DATABLOCKSIZE")
return self.expression(exp.DataBlocksizeProperty, min=True)
elif self._match_texts(("MAX", "MAXIMUM")):
self._match_text_seq("DATABLOCKSIZE")
return self.expression(exp.DataBlocksizeProperty, min=False)
self._match_text_seq("DATABLOCKSIZE")
self._match(TokenType.EQ)
size = self._parse_number()
units = None
if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
units = self._prev.text
return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
def _parse_blockcompression(self) -> exp.Expression:
self._match_text_seq("BLOCKCOMPRESSION")
self._match(TokenType.EQ)
always = self._match(TokenType.ALWAYS)
manual = self._match_text_seq("MANUAL")
never = self._match_text_seq("NEVER")
default = self._match_text_seq("DEFAULT")
autotemp = None
if self._match_text_seq("AUTOTEMP"):
autotemp = self._parse_schema()
return self.expression(
exp.BlockCompressionProperty,
always=always,
manual=manual,
never=never,
default=default,
autotemp=autotemp,
)
def _parse_withisolatedloading(self) -> exp.Expression:
self._match(TokenType.WITH)
no = self._match_text_seq("NO")
concurrent = self._match_text_seq("CONCURRENT")
self._match_text_seq("ISOLATED", "LOADING")
for_all = self._match_text_seq("FOR", "ALL")
for_insert = self._match_text_seq("FOR", "INSERT")
for_none = self._match_text_seq("FOR", "NONE")
return self.expression(
exp.IsolatedLoadingProperty,
no=no,
concurrent=concurrent,
for_all=for_all,
for_insert=for_insert,
for_none=for_none,
)
def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
if self._match(TokenType.PARTITION_BY):
return self._parse_csv(self._parse_conjunction)
return []
def _parse_partitioned_by(self) -> exp.Expression: def _parse_partitioned_by(self) -> exp.Expression:
self._match(TokenType.EQ) self._match(TokenType.EQ)
return self.expression( return self.expression(
@ -1093,21 +1298,6 @@ class Parser(metaclass=_Parser):
return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
def _parse_properties(self) -> t.Optional[exp.Expression]:
properties = []
while True:
identified_property = self._parse_property()
if not identified_property:
break
for p in ensure_collection(identified_property):
properties.append(p)
if properties:
return self.expression(exp.Properties, expressions=properties)
return None
def _parse_describe(self) -> exp.Expression: def _parse_describe(self) -> exp.Expression:
kind = self._match_set(self.CREATABLES) and self._prev.text kind = self._match_set(self.CREATABLES) and self._prev.text
this = self._parse_table() this = self._parse_table()
@ -1248,11 +1438,9 @@ class Parser(metaclass=_Parser):
if not self._match(TokenType.PARTITION): if not self._match(TokenType.PARTITION):
return None return None
def parse_values() -> exp.Property: return self.expression(
props = self._parse_csv(self._parse_var_or_string, sep=TokenType.EQ) exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
return exp.Property(this=seq_get(props, 0), value=seq_get(props, 1)) )
return self.expression(exp.Partition, this=self._parse_wrapped_csv(parse_values))
def _parse_value(self) -> exp.Expression: def _parse_value(self) -> exp.Expression:
if self._match(TokenType.L_PAREN): if self._match(TokenType.L_PAREN):
@ -1360,8 +1548,7 @@ class Parser(metaclass=_Parser):
if not alias or not alias.this: if not alias or not alias.this:
self.raise_error("Expected CTE to have alias") self.raise_error("Expected CTE to have alias")
if not self._match(TokenType.ALIAS): self._match(TokenType.ALIAS)
self.raise_error("Expected AS in CTE")
return self.expression( return self.expression(
exp.CTE, exp.CTE,
@ -1376,10 +1563,11 @@ class Parser(metaclass=_Parser):
alias = self._parse_id_var( alias = self._parse_id_var(
any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
) )
index = self._index
if self._match(TokenType.L_PAREN): if self._match(TokenType.L_PAREN):
columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var())) columns = self._parse_csv(lambda: self._parse_column_def(self._parse_id_var()))
self._match_r_paren() self._match_r_paren() if columns else self._retreat(index)
else: else:
columns = None columns = None
@ -1452,6 +1640,87 @@ class Parser(metaclass=_Parser):
exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table) exp.From, comments=self._prev_comments, expressions=self._parse_csv(self._parse_table)
) )
def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
if not self._match(TokenType.MATCH_RECOGNIZE):
return None
self._match_l_paren()
partition = self._parse_partition_by()
order = self._parse_order()
measures = (
self._parse_alias(self._parse_conjunction())
if self._match_text_seq("MEASURES")
else None
)
if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
rows = exp.Var(this="ONE ROW PER MATCH")
elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
text = "ALL ROWS PER MATCH"
if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
text += f" SHOW EMPTY MATCHES"
elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
text += f" OMIT EMPTY MATCHES"
elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
text += f" WITH UNMATCHED ROWS"
rows = exp.Var(this=text)
else:
rows = None
if self._match_text_seq("AFTER", "MATCH", "SKIP"):
text = "AFTER MATCH SKIP"
if self._match_text_seq("PAST", "LAST", "ROW"):
text += f" PAST LAST ROW"
elif self._match_text_seq("TO", "NEXT", "ROW"):
text += f" TO NEXT ROW"
elif self._match_text_seq("TO", "FIRST"):
text += f" TO FIRST {self._advance_any().text}" # type: ignore
elif self._match_text_seq("TO", "LAST"):
text += f" TO LAST {self._advance_any().text}" # type: ignore
after = exp.Var(this=text)
else:
after = None
if self._match_text_seq("PATTERN"):
self._match_l_paren()
if not self._curr:
self.raise_error("Expecting )", self._curr)
paren = 1
start = self._curr
while self._curr and paren > 0:
if self._curr.token_type == TokenType.L_PAREN:
paren += 1
if self._curr.token_type == TokenType.R_PAREN:
paren -= 1
self._advance()
if paren > 0:
self.raise_error("Expecting )", self._curr)
if not self._curr:
self.raise_error("Expecting pattern", self._curr)
end = self._prev
pattern = exp.Var(this=self._find_sql(start, end))
else:
pattern = None
define = (
self._parse_alias(self._parse_conjunction()) if self._match_text_seq("DEFINE") else None
)
self._match_r_paren()
return self.expression(
exp.MatchRecognize,
partition_by=partition,
order=order,
measures=measures,
rows=rows,
after=after,
pattern=pattern,
define=define,
)
def _parse_lateral(self) -> t.Optional[exp.Expression]: def _parse_lateral(self) -> t.Optional[exp.Expression]:
outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
@ -1772,12 +2041,19 @@ class Parser(metaclass=_Parser):
if not skip_group_by_token and not self._match(TokenType.GROUP_BY): if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
return None return None
expressions = self._parse_csv(self._parse_conjunction)
grouping_sets = self._parse_grouping_sets()
with_ = self._match(TokenType.WITH)
cube = self._match(TokenType.CUBE) and (with_ or self._parse_wrapped_id_vars())
rollup = self._match(TokenType.ROLLUP) and (with_ or self._parse_wrapped_id_vars())
return self.expression( return self.expression(
exp.Group, exp.Group,
expressions=self._parse_csv(self._parse_conjunction), expressions=expressions,
grouping_sets=self._parse_grouping_sets(), grouping_sets=grouping_sets,
cube=self._match(TokenType.CUBE) and self._parse_wrapped_id_vars(), cube=cube,
rollup=self._match(TokenType.ROLLUP) and self._parse_wrapped_id_vars(), rollup=rollup,
) )
def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
@ -1788,11 +2064,11 @@ class Parser(metaclass=_Parser):
def _parse_grouping_set(self) -> t.Optional[exp.Expression]: def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
if self._match(TokenType.L_PAREN): if self._match(TokenType.L_PAREN):
grouping_set = self._parse_csv(self._parse_id_var) grouping_set = self._parse_csv(self._parse_column)
self._match_r_paren() self._match_r_paren()
return self.expression(exp.Tuple, expressions=grouping_set) return self.expression(exp.Tuple, expressions=grouping_set)
return self._parse_id_var() return self._parse_column()
def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
if not skip_having_token and not self._match(TokenType.HAVING): if not skip_having_token and not self._match(TokenType.HAVING):
@ -2268,7 +2544,6 @@ class Parser(metaclass=_Parser):
args = self._parse_csv(self._parse_lambda) args = self._parse_csv(self._parse_lambda)
if function: if function:
# Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the # Clickhouse supports function calls like foo(x, y)(z), so for these we need to also parse the
# second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists. # second parameter list (i.e. "(z)") and the corresponding function will receive both arg lists.
if count_params(function) == 2: if count_params(function) == 2:
@ -2541,9 +2816,10 @@ class Parser(metaclass=_Parser):
return self.expression(exp.PrimaryKey, expressions=expressions, options=options) return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
if not self._match(TokenType.L_BRACKET): if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
return this return this
bracket_kind = self._prev.token_type
expressions: t.List[t.Optional[exp.Expression]] expressions: t.List[t.Optional[exp.Expression]]
if self._match(TokenType.COLON): if self._match(TokenType.COLON):
@ -2551,14 +2827,19 @@ class Parser(metaclass=_Parser):
else: else:
expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
if not this or this.name.upper() == "ARRAY": # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
if bracket_kind == TokenType.L_BRACE:
this = self.expression(exp.Struct, expressions=expressions)
elif not this or this.name.upper() == "ARRAY":
this = self.expression(exp.Array, expressions=expressions) this = self.expression(exp.Array, expressions=expressions)
else: else:
expressions = apply_index_offset(expressions, -self.index_offset) expressions = apply_index_offset(expressions, -self.index_offset)
this = self.expression(exp.Bracket, this=this, expressions=expressions) this = self.expression(exp.Bracket, this=this, expressions=expressions)
if not self._match(TokenType.R_BRACKET): if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
self.raise_error("Expected ]") self.raise_error("Expected ]")
elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
self.raise_error("Expected }")
this.comments = self._prev_comments this.comments = self._prev_comments
return self._parse_bracket(this) return self._parse_bracket(this)
@ -2727,7 +3008,7 @@ class Parser(metaclass=_Parser):
position = self._prev.text.upper() position = self._prev.text.upper()
expression = self._parse_term() expression = self._parse_term()
if self._match(TokenType.FROM): if self._match_set((TokenType.FROM, TokenType.COMMA)):
this = self._parse_term() this = self._parse_term()
else: else:
this = expression this = expression
@ -2792,14 +3073,8 @@ class Parser(metaclass=_Parser):
return self.expression(exp.Window, this=this, alias=self._parse_id_var(False)) return self.expression(exp.Window, this=this, alias=self._parse_id_var(False))
window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
partition = self._parse_partition_by()
partition = None
if self._match(TokenType.PARTITION_BY):
partition = self._parse_csv(self._parse_conjunction)
order = self._parse_order() order = self._parse_order()
spec = None
kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
if kind: if kind:
@ -2816,6 +3091,8 @@ class Parser(metaclass=_Parser):
end=end["value"], end=end["value"],
end_side=end["side"], end_side=end["side"],
) )
else:
spec = None
self._match_r_paren() self._match_r_paren()
@ -3060,6 +3337,12 @@ class Parser(metaclass=_Parser):
def _parse_drop_column(self) -> t.Optional[exp.Expression]: def _parse_drop_column(self) -> t.Optional[exp.Expression]:
return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN") return self._match(TokenType.DROP) and self._parse_drop(default_kind="COLUMN")
# https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
return self.expression(
exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
)
def _parse_add_constraint(self) -> t.Optional[exp.Expression]: def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
this = None this = None
kind = self._prev.token_type kind = self._prev.token_type
@ -3092,13 +3375,23 @@ class Parser(metaclass=_Parser):
actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None actions: t.Optional[exp.Expression | t.List[t.Optional[exp.Expression]]] = None
index = self._index index = self._index
if self._match_text_seq("ADD"): if self._match(TokenType.DELETE):
actions = [self.expression(exp.Delete, where=self._parse_where())]
elif self._match_text_seq("ADD"):
if self._match_set(self.ADD_CONSTRAINT_TOKENS): if self._match_set(self.ADD_CONSTRAINT_TOKENS):
actions = self._parse_csv(self._parse_add_constraint) actions = self._parse_csv(self._parse_add_constraint)
else: else:
self._retreat(index) self._retreat(index)
actions = self._parse_csv(self._parse_add_column) actions = self._parse_csv(self._parse_add_column)
elif self._match_text_seq("DROP", advance=False): elif self._match_text_seq("DROP"):
partition_exists = self._parse_exists()
if self._match(TokenType.PARTITION, advance=False):
actions = self._parse_csv(
lambda: self._parse_drop_partition(exists=partition_exists)
)
else:
self._retreat(index)
actions = self._parse_csv(self._parse_drop_column) actions = self._parse_csv(self._parse_drop_column)
elif self._match_text_seq("RENAME", "TO"): elif self._match_text_seq("RENAME", "TO"):
actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True)) actions = self.expression(exp.RenameTable, this=self._parse_table(schema=True))

View file

@ -22,6 +22,7 @@ class TokenType(AutoName):
DCOLON = auto() DCOLON = auto()
SEMICOLON = auto() SEMICOLON = auto()
STAR = auto() STAR = auto()
BACKSLASH = auto()
SLASH = auto() SLASH = auto()
LT = auto() LT = auto()
LTE = auto() LTE = auto()
@ -157,18 +158,14 @@ class TokenType(AutoName):
DELETE = auto() DELETE = auto()
DESC = auto() DESC = auto()
DESCRIBE = auto() DESCRIBE = auto()
DETERMINISTIC = auto()
DISTINCT = auto() DISTINCT = auto()
DISTINCT_FROM = auto() DISTINCT_FROM = auto()
DISTKEY = auto()
DISTRIBUTE_BY = auto() DISTRIBUTE_BY = auto()
DISTSTYLE = auto()
DIV = auto() DIV = auto()
DROP = auto() DROP = auto()
ELSE = auto() ELSE = auto()
ENCODE = auto() ENCODE = auto()
END = auto() END = auto()
ENGINE = auto()
ESCAPE = auto() ESCAPE = auto()
EXCEPT = auto() EXCEPT = auto()
EXECUTE = auto() EXECUTE = auto()
@ -182,10 +179,11 @@ class TokenType(AutoName):
FOR = auto() FOR = auto()
FOREIGN_KEY = auto() FOREIGN_KEY = auto()
FORMAT = auto() FORMAT = auto()
FROM = auto()
FULL = auto() FULL = auto()
FUNCTION = auto() FUNCTION = auto()
FROM = auto()
GENERATED = auto() GENERATED = auto()
GLOB = auto()
GLOBAL = auto() GLOBAL = auto()
GROUP_BY = auto() GROUP_BY = auto()
GROUPING_SETS = auto() GROUPING_SETS = auto()
@ -195,7 +193,6 @@ class TokenType(AutoName):
IF = auto() IF = auto()
IGNORE_NULLS = auto() IGNORE_NULLS = auto()
ILIKE = auto() ILIKE = auto()
IMMUTABLE = auto()
IN = auto() IN = auto()
INDEX = auto() INDEX = auto()
INNER = auto() INNER = auto()
@ -217,8 +214,8 @@ class TokenType(AutoName):
LIMIT = auto() LIMIT = auto()
LOAD_DATA = auto() LOAD_DATA = auto()
LOCAL = auto() LOCAL = auto()
LOCATION = auto()
MAP = auto() MAP = auto()
MATCH_RECOGNIZE = auto()
MATERIALIZED = auto() MATERIALIZED = auto()
MERGE = auto() MERGE = auto()
MOD = auto() MOD = auto()
@ -242,7 +239,6 @@ class TokenType(AutoName):
OVERWRITE = auto() OVERWRITE = auto()
PARTITION = auto() PARTITION = auto()
PARTITION_BY = auto() PARTITION_BY = auto()
PARTITIONED_BY = auto()
PERCENT = auto() PERCENT = auto()
PIVOT = auto() PIVOT = auto()
PLACEHOLDER = auto() PLACEHOLDER = auto()
@ -258,7 +254,6 @@ class TokenType(AutoName):
REPLACE = auto() REPLACE = auto()
RESPECT_NULLS = auto() RESPECT_NULLS = auto()
REFERENCES = auto() REFERENCES = auto()
RETURNS = auto()
RIGHT = auto() RIGHT = auto()
RLIKE = auto() RLIKE = auto()
ROLLBACK = auto() ROLLBACK = auto()
@ -277,10 +272,7 @@ class TokenType(AutoName):
SOME = auto() SOME = auto()
SORTKEY = auto() SORTKEY = auto()
SORT_BY = auto() SORT_BY = auto()
STABLE = auto()
STORED = auto()
STRUCT = auto() STRUCT = auto()
TABLE_FORMAT = auto()
TABLE_SAMPLE = auto() TABLE_SAMPLE = auto()
TEMPORARY = auto() TEMPORARY = auto()
TOP = auto() TOP = auto()
@ -414,6 +406,7 @@ class Tokenizer(metaclass=_Tokenizer):
"+": TokenType.PLUS, "+": TokenType.PLUS,
";": TokenType.SEMICOLON, ";": TokenType.SEMICOLON,
"/": TokenType.SLASH, "/": TokenType.SLASH,
"\\": TokenType.BACKSLASH,
"*": TokenType.STAR, "*": TokenType.STAR,
"~": TokenType.TILDA, "~": TokenType.TILDA,
"?": TokenType.PLACEHOLDER, "?": TokenType.PLACEHOLDER,
@ -448,9 +441,11 @@ class Tokenizer(metaclass=_Tokenizer):
}, },
**{ **{
f"{prefix}{key}": TokenType.BLOCK_END f"{prefix}{key}": TokenType.BLOCK_END
for key in ("}}", "%}", "#}") for key in ("%}", "#}")
for prefix in ("", "+", "-") for prefix in ("", "+", "-")
}, },
"+}}": TokenType.BLOCK_END,
"-}}": TokenType.BLOCK_END,
"/*+": TokenType.HINT, "/*+": TokenType.HINT,
"==": TokenType.EQ, "==": TokenType.EQ,
"::": TokenType.DCOLON, "::": TokenType.DCOLON,
@ -503,17 +498,13 @@ class Tokenizer(metaclass=_Tokenizer):
"DELETE": TokenType.DELETE, "DELETE": TokenType.DELETE,
"DESC": TokenType.DESC, "DESC": TokenType.DESC,
"DESCRIBE": TokenType.DESCRIBE, "DESCRIBE": TokenType.DESCRIBE,
"DETERMINISTIC": TokenType.DETERMINISTIC,
"DISTINCT": TokenType.DISTINCT, "DISTINCT": TokenType.DISTINCT,
"DISTINCT FROM": TokenType.DISTINCT_FROM, "DISTINCT FROM": TokenType.DISTINCT_FROM,
"DISTKEY": TokenType.DISTKEY,
"DISTRIBUTE BY": TokenType.DISTRIBUTE_BY, "DISTRIBUTE BY": TokenType.DISTRIBUTE_BY,
"DISTSTYLE": TokenType.DISTSTYLE,
"DIV": TokenType.DIV, "DIV": TokenType.DIV,
"DROP": TokenType.DROP, "DROP": TokenType.DROP,
"ELSE": TokenType.ELSE, "ELSE": TokenType.ELSE,
"END": TokenType.END, "END": TokenType.END,
"ENGINE": TokenType.ENGINE,
"ESCAPE": TokenType.ESCAPE, "ESCAPE": TokenType.ESCAPE,
"EXCEPT": TokenType.EXCEPT, "EXCEPT": TokenType.EXCEPT,
"EXECUTE": TokenType.EXECUTE, "EXECUTE": TokenType.EXECUTE,
@ -530,13 +521,13 @@ class Tokenizer(metaclass=_Tokenizer):
"FORMAT": TokenType.FORMAT, "FORMAT": TokenType.FORMAT,
"FROM": TokenType.FROM, "FROM": TokenType.FROM,
"GENERATED": TokenType.GENERATED, "GENERATED": TokenType.GENERATED,
"GLOB": TokenType.GLOB,
"GROUP BY": TokenType.GROUP_BY, "GROUP BY": TokenType.GROUP_BY,
"GROUPING SETS": TokenType.GROUPING_SETS, "GROUPING SETS": TokenType.GROUPING_SETS,
"HAVING": TokenType.HAVING, "HAVING": TokenType.HAVING,
"IDENTITY": TokenType.IDENTITY, "IDENTITY": TokenType.IDENTITY,
"IF": TokenType.IF, "IF": TokenType.IF,
"ILIKE": TokenType.ILIKE, "ILIKE": TokenType.ILIKE,
"IMMUTABLE": TokenType.IMMUTABLE,
"IGNORE NULLS": TokenType.IGNORE_NULLS, "IGNORE NULLS": TokenType.IGNORE_NULLS,
"IN": TokenType.IN, "IN": TokenType.IN,
"INDEX": TokenType.INDEX, "INDEX": TokenType.INDEX,
@ -548,7 +539,6 @@ class Tokenizer(metaclass=_Tokenizer):
"IS": TokenType.IS, "IS": TokenType.IS,
"ISNULL": TokenType.ISNULL, "ISNULL": TokenType.ISNULL,
"JOIN": TokenType.JOIN, "JOIN": TokenType.JOIN,
"LANGUAGE": TokenType.LANGUAGE,
"LATERAL": TokenType.LATERAL, "LATERAL": TokenType.LATERAL,
"LAZY": TokenType.LAZY, "LAZY": TokenType.LAZY,
"LEADING": TokenType.LEADING, "LEADING": TokenType.LEADING,
@ -557,7 +547,6 @@ class Tokenizer(metaclass=_Tokenizer):
"LIMIT": TokenType.LIMIT, "LIMIT": TokenType.LIMIT,
"LOAD DATA": TokenType.LOAD_DATA, "LOAD DATA": TokenType.LOAD_DATA,
"LOCAL": TokenType.LOCAL, "LOCAL": TokenType.LOCAL,
"LOCATION": TokenType.LOCATION,
"MATERIALIZED": TokenType.MATERIALIZED, "MATERIALIZED": TokenType.MATERIALIZED,
"MERGE": TokenType.MERGE, "MERGE": TokenType.MERGE,
"NATURAL": TokenType.NATURAL, "NATURAL": TokenType.NATURAL,
@ -582,8 +571,8 @@ class Tokenizer(metaclass=_Tokenizer):
"OVERWRITE": TokenType.OVERWRITE, "OVERWRITE": TokenType.OVERWRITE,
"PARTITION": TokenType.PARTITION, "PARTITION": TokenType.PARTITION,
"PARTITION BY": TokenType.PARTITION_BY, "PARTITION BY": TokenType.PARTITION_BY,
"PARTITIONED BY": TokenType.PARTITIONED_BY, "PARTITIONED BY": TokenType.PARTITION_BY,
"PARTITIONED_BY": TokenType.PARTITIONED_BY, "PARTITIONED_BY": TokenType.PARTITION_BY,
"PERCENT": TokenType.PERCENT, "PERCENT": TokenType.PERCENT,
"PIVOT": TokenType.PIVOT, "PIVOT": TokenType.PIVOT,
"PRECEDING": TokenType.PRECEDING, "PRECEDING": TokenType.PRECEDING,
@ -596,7 +585,6 @@ class Tokenizer(metaclass=_Tokenizer):
"REPLACE": TokenType.REPLACE, "REPLACE": TokenType.REPLACE,
"RESPECT NULLS": TokenType.RESPECT_NULLS, "RESPECT NULLS": TokenType.RESPECT_NULLS,
"REFERENCES": TokenType.REFERENCES, "REFERENCES": TokenType.REFERENCES,
"RETURNS": TokenType.RETURNS,
"RIGHT": TokenType.RIGHT, "RIGHT": TokenType.RIGHT,
"RLIKE": TokenType.RLIKE, "RLIKE": TokenType.RLIKE,
"ROLLBACK": TokenType.ROLLBACK, "ROLLBACK": TokenType.ROLLBACK,
@ -613,11 +601,7 @@ class Tokenizer(metaclass=_Tokenizer):
"SOME": TokenType.SOME, "SOME": TokenType.SOME,
"SORTKEY": TokenType.SORTKEY, "SORTKEY": TokenType.SORTKEY,
"SORT BY": TokenType.SORT_BY, "SORT BY": TokenType.SORT_BY,
"STABLE": TokenType.STABLE,
"STORED": TokenType.STORED,
"TABLE": TokenType.TABLE, "TABLE": TokenType.TABLE,
"TABLE_FORMAT": TokenType.TABLE_FORMAT,
"TBLPROPERTIES": TokenType.PROPERTIES,
"TABLESAMPLE": TokenType.TABLE_SAMPLE, "TABLESAMPLE": TokenType.TABLE_SAMPLE,
"TEMP": TokenType.TEMPORARY, "TEMP": TokenType.TEMPORARY,
"TEMPORARY": TokenType.TEMPORARY, "TEMPORARY": TokenType.TEMPORARY,

View file

@ -27,20 +27,18 @@ def unalias_group(expression: exp.Expression) -> exp.Expression:
""" """
if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select): if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select):
aliased_selects = { aliased_selects = {
e.alias: (i, e.this) e.alias: i
for i, e in enumerate(expression.parent.expressions, start=1) for i, e in enumerate(expression.parent.expressions, start=1)
if isinstance(e, exp.Alias) if isinstance(e, exp.Alias)
} }
expression = expression.copy() for group_by in expression.expressions:
if (
top_level_expression = None isinstance(group_by, exp.Column)
for item, parent, _ in expression.walk(bfs=False): and not group_by.table
top_level_expression = item if isinstance(parent, exp.Group) else top_level_expression and group_by.name in aliased_selects
if isinstance(item, exp.Column) and not item.table: ):
alias_index, col_expression = aliased_selects.get(item.name, (None, None)) group_by.replace(exp.Literal.number(aliased_selects.get(group_by.name)))
if alias_index and top_level_expression != col_expression:
item.replace(exp.Literal.number(alias_index))
return expression return expression
@ -63,22 +61,21 @@ def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression:
and expression.args["distinct"].args.get("on") and expression.args["distinct"].args.get("on")
and isinstance(expression.args["distinct"].args["on"], exp.Tuple) and isinstance(expression.args["distinct"].args["on"], exp.Tuple)
): ):
distinct_cols = [e.copy() for e in expression.args["distinct"].args["on"].expressions] distinct_cols = expression.args["distinct"].args["on"].expressions
outer_selects = [e.copy() for e in expression.expressions] expression.args["distinct"].pop()
nested = expression.copy() outer_selects = expression.selects
nested.args["distinct"].pop()
row_number = find_new_name(expression.named_selects, "_row_number") row_number = find_new_name(expression.named_selects, "_row_number")
window = exp.Window( window = exp.Window(
this=exp.RowNumber(), this=exp.RowNumber(),
partition_by=distinct_cols, partition_by=distinct_cols,
) )
order = nested.args.get("order") order = expression.args.get("order")
if order: if order:
window.set("order", order.copy()) window.set("order", order.copy())
order.pop() order.pop()
window = exp.alias_(window, row_number) window = exp.alias_(window, row_number)
nested.select(window, copy=False) expression.select(window, copy=False)
return exp.select(*outer_selects).from_(nested.subquery()).where(f'"{row_number}" = 1') return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1')
return expression return expression
@ -120,7 +117,7 @@ def preprocess(
""" """
def _to_sql(self, expression): def _to_sql(self, expression):
expression = transforms[0](expression) expression = transforms[0](expression.copy())
for t in transforms[1:]: for t in transforms[1:]:
expression = t(expression) expression = t(expression)
return to_sql(self, expression) return to_sql(self, expression)

View file

@ -102,3 +102,18 @@ class TestDatabricks(Validator):
"databricks": "SELECT DATEADD(DAY, 1, '2020-01-01')", "databricks": "SELECT DATEADD(DAY, 1, '2020-01-01')",
}, },
) )
def test_without_as(self):
self.validate_all(
"CREATE TABLE x (SELECT 1)",
write={
"databricks": "CREATE TABLE x AS (SELECT 1)",
},
)
self.validate_all(
"WITH x (select 1) SELECT * FROM x",
write={
"databricks": "WITH x AS (SELECT 1) SELECT * FROM x",
},
)

View file

@ -9,9 +9,9 @@ class Validator(unittest.TestCase):
def parse_one(self, sql): def parse_one(self, sql):
return parse_one(sql, read=self.dialect) return parse_one(sql, read=self.dialect)
def validate_identity(self, sql, write_sql=None): def validate_identity(self, sql, write_sql=None, pretty=False):
expression = self.parse_one(sql) expression = self.parse_one(sql)
self.assertEqual(write_sql or sql, expression.sql(dialect=self.dialect)) self.assertEqual(write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty))
return expression return expression
def validate_all(self, sql, read=None, write=None, pretty=False, identify=False): def validate_all(self, sql, read=None, write=None, pretty=False, identify=False):

View file

@ -75,6 +75,19 @@ class TestDuckDB(Validator):
) )
def test_duckdb(self): def test_duckdb(self):
self.validate_identity("SELECT {'a': 1} AS x")
self.validate_identity("SELECT {'a': {'b': {'c': 1}}, 'd': {'e': 2}} AS x")
self.validate_identity("SELECT {'x': 1, 'y': 2, 'z': 3}")
self.validate_identity(
"SELECT {'yes': 'duck', 'maybe': 'goose', 'huh': NULL, 'no': 'heron'}"
)
self.validate_identity("SELECT {'key1': 'string', 'key2': 1, 'key3': 12.345}")
self.validate_identity("SELECT ROW(x, x + 1, y) FROM (SELECT 1 AS x, 'a' AS y)")
self.validate_identity("SELECT (x, x + 1, y) FROM (SELECT 1 AS x, 'a' AS y)")
self.validate_identity("SELECT a.x FROM (SELECT {'x': 1, 'y': 2, 'z': 3} AS a)")
self.validate_identity(
"SELECT a['x space'] FROM (SELECT {'x space': 1, 'y': 2, 'z': 3} AS a)"
)
self.validate_all( self.validate_all(
"CREATE TABLE IF NOT EXISTS table (cola INT, colb STRING) USING ICEBERG PARTITIONED BY (colb)", "CREATE TABLE IF NOT EXISTS table (cola INT, colb STRING) USING ICEBERG PARTITIONED BY (colb)",
write={ write={
@ -229,10 +242,17 @@ class TestDuckDB(Validator):
self.validate_all( self.validate_all(
"STRUCT_PACK(x := 1, y := '2')", "STRUCT_PACK(x := 1, y := '2')",
write={ write={
"duckdb": "STRUCT_PACK(x := 1, y := '2')", "duckdb": "{'x': 1, 'y': '2'}",
"spark": "STRUCT(x = 1, y = '2')", "spark": "STRUCT(x = 1, y = '2')",
}, },
) )
self.validate_all(
"STRUCT_PACK(key1 := 'value1', key2 := 42)",
write={
"duckdb": "{'key1': 'value1', 'key2': 42}",
"spark": "STRUCT(key1 = 'value1', key2 = 42)",
},
)
self.validate_all( self.validate_all(
"ARRAY_SORT(x)", "ARRAY_SORT(x)",
write={ write={

View file

@ -338,6 +338,27 @@ class TestHive(Validator):
) )
def test_hive(self): def test_hive(self):
self.validate_identity(
"INSERT OVERWRITE TABLE zipcodes PARTITION(state = '0') VALUES (896, 'US', 'TAMPA', 33607)"
)
self.validate_identity(
"INSERT OVERWRITE TABLE zipcodes PARTITION(state = 0) VALUES (896, 'US', 'TAMPA', 33607)"
)
self.validate_identity(
"SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b GROUPING SETS ((a, b), a)"
)
self.validate_identity(
"SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b GROUPING SETS ((t.a, b), a)"
)
self.validate_identity(
"SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, FOO(b) GROUPING SETS ((a, FOO(b)), a)"
)
self.validate_identity(
"SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH CUBE"
)
self.validate_identity(
"SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH ROLLUP"
)
self.validate_all( self.validate_all(
"SELECT A.1a AS b FROM test_a AS A", "SELECT A.1a AS b FROM test_a AS A",
write={ write={
@ -615,3 +636,20 @@ class TestHive(Validator):
"spark": "SELECT * FROM x TABLESAMPLE(1) AS foo", "spark": "SELECT * FROM x TABLESAMPLE(1) AS foo",
}, },
) )
self.validate_all(
"SELECT * FROM x TABLESAMPLE(1) AS foo",
read={
"presto": "SELECT * FROM x AS foo TABLESAMPLE(1)",
},
write={
"presto": "SELECT * FROM x AS foo TABLESAMPLE(1)",
"hive": "SELECT * FROM x TABLESAMPLE(1) AS foo",
"spark": "SELECT * FROM x TABLESAMPLE(1) AS foo",
},
)
self.validate_all(
"SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(b, 'yyyy') GROUPING SETS ((a, DATE_FORMAT(b, 'yyyy')), a)",
write={
"hive": "SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(CAST(b AS TIMESTAMP), 'yyyy') GROUPING SETS ((a, DATE_FORMAT(CAST(b AS TIMESTAMP), 'yyyy')), a)",
},
)

View file

@ -56,7 +56,22 @@ class TestPostgres(Validator):
) )
def test_postgres(self): def test_postgres(self):
self.validate_all(
"x ^ y",
write={
"": "POWER(x, y)",
"postgres": "x ^ y",
},
)
self.validate_all(
"x # y",
write={
"": "x ^ y",
"postgres": "x # y",
},
)
self.validate_identity("SELECT ARRAY[1, 2, 3]") self.validate_identity("SELECT ARRAY[1, 2, 3]")
self.validate_identity("SELECT ARRAY(SELECT 1)")
self.validate_identity("SELECT ARRAY_LENGTH(ARRAY[1, 2, 3], 1)") self.validate_identity("SELECT ARRAY_LENGTH(ARRAY[1, 2, 3], 1)")
self.validate_identity("STRING_AGG(x, y)") self.validate_identity("STRING_AGG(x, y)")
self.validate_identity("STRING_AGG(x, ',' ORDER BY y)") self.validate_identity("STRING_AGG(x, ',' ORDER BY y)")
@ -88,6 +103,14 @@ class TestPostgres(Validator):
self.validate_identity("SELECT e'\\xDEADBEEF'") self.validate_identity("SELECT e'\\xDEADBEEF'")
self.validate_identity("SELECT CAST(e'\\176' AS BYTEA)") self.validate_identity("SELECT CAST(e'\\176' AS BYTEA)")
self.validate_identity("""SELECT * FROM JSON_TO_RECORDSET(z) AS y("rank" INT)""") self.validate_identity("""SELECT * FROM JSON_TO_RECORDSET(z) AS y("rank" INT)""")
self.validate_identity(
"SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)"
)
self.validate_identity(
"CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)"
)
self.validate_identity("x ~ 'y'")
self.validate_identity("x ~* 'y'")
self.validate_all( self.validate_all(
"END WORK AND NO CHAIN", "END WORK AND NO CHAIN",
@ -118,10 +141,6 @@ class TestPostgres(Validator):
"SELECT to_timestamp(123)::time without time zone", "SELECT to_timestamp(123)::time without time zone",
write={"postgres": "SELECT CAST(TO_TIMESTAMP(123) AS TIME)"}, write={"postgres": "SELECT CAST(TO_TIMESTAMP(123) AS TIME)"},
) )
self.validate_identity(
"CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)"
)
self.validate_all( self.validate_all(
"SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS 1 PRECEDING)", "SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS 1 PRECEDING)",
write={ write={
@ -283,9 +302,6 @@ class TestPostgres(Validator):
"UPDATE MYTABLE T1 SET T1.COL = 13", "UPDATE MYTABLE T1 SET T1.COL = 13",
write={"postgres": "UPDATE MYTABLE AS T1 SET T1.COL = 13"}, write={"postgres": "UPDATE MYTABLE AS T1 SET T1.COL = 13"},
) )
self.validate_identity("x ~ 'y'")
self.validate_identity("x ~* 'y'")
self.validate_all( self.validate_all(
"x !~ 'y'", "x !~ 'y'",
write={"postgres": "NOT x ~ 'y'"}, write={"postgres": "NOT x ~ 'y'"},
@ -319,13 +335,20 @@ class TestPostgres(Validator):
"'x' 'y' 'z'", "'x' 'y' 'z'",
write={"postgres": "CONCAT('x', 'y', 'z')"}, write={"postgres": "CONCAT('x', 'y', 'z')"},
) )
self.validate_identity("SELECT ARRAY(SELECT 1)")
self.validate_all( self.validate_all(
"x::cstring", "x::cstring",
write={"postgres": "CAST(x AS CSTRING)"}, write={"postgres": "CAST(x AS CSTRING)"},
) )
self.validate_all(
self.validate_identity( "TRIM(BOTH 'as' FROM 'as string as')",
"SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)" write={
"postgres": "TRIM(BOTH 'as' FROM 'as string as')",
"spark": "TRIM(BOTH 'as' FROM 'as string as')",
},
)
def test_bool_or(self):
self.validate_all(
"SELECT a, LOGICAL_OR(b) FROM table GROUP BY a",
write={"postgres": "SELECT a, BOOL_OR(b) FROM table GROUP BY a"},
) )

View file

@ -174,6 +174,13 @@ class TestPresto(Validator):
"spark": "DATE_ADD(x, 1)", "spark": "DATE_ADD(x, 1)",
}, },
) )
self.validate_all(
"NOW()",
write={
"presto": "CURRENT_TIMESTAMP()",
"hive": "CURRENT_TIMESTAMP()",
},
)
def test_ddl(self): def test_ddl(self):
self.validate_all( self.validate_all(

View file

@ -571,3 +571,35 @@ FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f, LATERA
"spark": "DESCRIBE db.table", "spark": "DESCRIBE db.table",
}, },
) )
def test_match_recognize(self):
for row in (
"ONE ROW PER MATCH",
"ALL ROWS PER MATCH",
"ALL ROWS PER MATCH SHOW EMPTY MATCHES",
"ALL ROWS PER MATCH OMIT EMPTY MATCHES",
"ALL ROWS PER MATCH WITH UNMATCHED ROWS",
):
for after in (
"AFTER MATCH SKIP",
"AFTER MATCH SKIP PAST LAST ROW",
"AFTER MATCH SKIP TO NEXT ROW",
"AFTER MATCH SKIP TO FIRST x",
"AFTER MATCH SKIP TO LAST x",
):
self.validate_identity(
f"""SELECT
*
FROM x
MATCH_RECOGNIZE (
PARTITION BY a, b
ORDER BY
x DESC
MEASURES y AS b
{row}
{after}
PATTERN (^ S1 S2*? ( {{- S3 -}} S4 )+ | PERMUTE(S1, S2){{1,2}} $)
DEFINE x AS y
)""",
pretty=True,
)

View file

@ -208,6 +208,13 @@ TBLPROPERTIES (
def test_spark(self): def test_spark(self):
self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("SELECT UNIX_TIMESTAMP()")
self.validate_identity("TRIM(' SparkSQL ')")
self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')")
self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')")
self.validate_all(
"TRIM('SL', 'SSparkSQLS')", write={"spark": "TRIM('SL' FROM 'SSparkSQLS')"}
)
self.validate_all( self.validate_all(
"ARRAY_SORT(x, (left, right) -> -1)", "ARRAY_SORT(x, (left, right) -> -1)",
write={ write={
@ -314,5 +321,5 @@ TBLPROPERTIES (
def test_bool_or(self): def test_bool_or(self):
self.validate_all( self.validate_all(
"SELECT a, LOGICAL_OR(b) FROM table GROUP BY a", "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a",
write={"duckdb": "SELECT a, BOOL_OR(b) FROM table GROUP BY a"}, write={"spark": "SELECT a, BOOL_OR(b) FROM table GROUP BY a"},
) )

View file

@ -99,6 +99,8 @@ STR_POSITION(haystack, needle, pos)
LEVENSHTEIN('gumbo', 'gambol', 2, 1, 1) LEVENSHTEIN('gumbo', 'gambol', 2, 1, 1)
SPLIT(SPLIT(referrer, 'utm_source=')[OFFSET(1)], "&")[OFFSET(0)] SPLIT(SPLIT(referrer, 'utm_source=')[OFFSET(1)], "&")[OFFSET(0)]
x[ORDINAL(1)][SAFE_OFFSET(2)] x[ORDINAL(1)][SAFE_OFFSET(2)]
x GLOB '??-*'
x GLOB y
x LIKE SUBSTR('abc', 1, 1) x LIKE SUBSTR('abc', 1, 1)
x LIKE y x LIKE y
x LIKE a.y x LIKE a.y
@ -143,6 +145,10 @@ SET -v
SET x = ';' SET x = ';'
COMMIT COMMIT
USE db USE db
USE role x
USE warehouse x
USE database x
USE schema x.y
NOT 1 NOT 1
NOT NOT 1 NOT NOT 1
SELECT * FROM test SELECT * FROM test
@ -479,19 +485,6 @@ CREATE TABLE a.b AS (SELECT 1) UNIQUE PRIMARY INDEX index1 (a) UNIQUE INDEX inde
CREATE TABLE a.b AS (SELECT 1) PRIMARY AMP INDEX index1 (a) UNIQUE INDEX index2 (b) CREATE TABLE a.b AS (SELECT 1) PRIMARY AMP INDEX index1 (a) UNIQUE INDEX index2 (b)
CREATE TABLE a.b AS SELECT a FROM a.c CREATE TABLE a.b AS SELECT a FROM a.c
CREATE TABLE IF NOT EXISTS x AS SELECT a FROM d CREATE TABLE IF NOT EXISTS x AS SELECT a FROM d
CREATE TEMPORARY TABLE x AS SELECT a FROM d
CREATE TEMPORARY TABLE IF NOT EXISTS x AS SELECT a FROM d
CREATE VIEW x AS SELECT a FROM b
CREATE VIEW IF NOT EXISTS x AS SELECT a FROM b
CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d
CREATE VIEW IF NOT EXISTS z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d
CREATE OR REPLACE VIEW x AS SELECT *
CREATE OR REPLACE TEMPORARY VIEW x AS SELECT *
CREATE TEMPORARY VIEW x AS SELECT a FROM d
CREATE TEMPORARY VIEW IF NOT EXISTS x AS SELECT a FROM d
CREATE TEMPORARY VIEW x AS WITH y AS (SELECT 1) SELECT * FROM y
CREATE MATERIALIZED VIEW x.y.z AS SELECT a FROM b
DROP MATERIALIZED VIEW x.y.z
CREATE TABLE z (a INT, b VARCHAR, c VARCHAR(100), d DECIMAL(5, 3)) CREATE TABLE z (a INT, b VARCHAR, c VARCHAR(100), d DECIMAL(5, 3))
CREATE TABLE z (end INT) CREATE TABLE z (end INT)
CREATE TABLE z (a ARRAY<TEXT>, b MAP<TEXT, DOUBLE>, c DECIMAL(5, 3)) CREATE TABLE z (a ARRAY<TEXT>, b MAP<TEXT, DOUBLE>, c DECIMAL(5, 3))
@ -517,6 +510,34 @@ CREATE TABLE z (a INT UNIQUE AUTO_INCREMENT)
CREATE TABLE z (a INT REFERENCES parent(b, c)) CREATE TABLE z (a INT REFERENCES parent(b, c))
CREATE TABLE z (a INT PRIMARY KEY, b INT REFERENCES foo(id)) CREATE TABLE z (a INT PRIMARY KEY, b INT REFERENCES foo(id))
CREATE TABLE z (a INT, FOREIGN KEY (a) REFERENCES parent(b, c)) CREATE TABLE z (a INT, FOREIGN KEY (a) REFERENCES parent(b, c))
CREATE TABLE asd AS SELECT asd FROM asd WITH NO DATA
CREATE TABLE asd AS SELECT asd FROM asd WITH DATA
CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY)
CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY)
CREATE TABLE IF NOT EXISTS customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 1))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10))
CREATE TABLE foo (baz_id INT REFERENCES baz(id) DEFERRABLE)
CREATE TABLE a, FALLBACK, LOG, JOURNAL, CHECKSUM=DEFAULT, DEFAULT MERGEBLOCKRATIO, BLOCKCOMPRESSION=MANUAL (a INT)
CREATE TABLE a, NO FALLBACK PROTECTION, NO LOG, NO JOURNAL, CHECKSUM=ON, NO MERGEBLOCKRATIO, BLOCKCOMPRESSION=ALWAYS (a INT)
CREATE TABLE a, WITH JOURNAL TABLE=x.y.z, CHECKSUM=OFF, MERGEBLOCKRATIO=1, DATABLOCKSIZE=10 KBYTES (a INT)
CREATE TABLE a, BEFORE JOURNAL, AFTER JOURNAL, FREESPACE=1, DEFAULT DATABLOCKSIZE, BLOCKCOMPRESSION=DEFAULT (a INT)
CREATE TABLE a, DUAL JOURNAL, DUAL AFTER JOURNAL, MERGEBLOCKRATIO=1 PERCENT, DATABLOCKSIZE=10 KILOBYTES (a INT)
CREATE TABLE a, DUAL BEFORE JOURNAL, LOCAL AFTER JOURNAL, MAXIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=AUTOTEMP(c1 INT) (a INT)
CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)
CREATE MULTISET VOLATILE TABLE a, NOT LOCAL AFTER JOURNAL, FREESPACE=1 PERCENT, DATABLOCKSIZE=10 BYTES, WITH NO CONCURRENT ISOLATED LOADING FOR ALL (a INT)
CREATE TEMPORARY TABLE x AS SELECT a FROM d
CREATE TEMPORARY TABLE IF NOT EXISTS x AS SELECT a FROM d
CREATE VIEW x AS SELECT a FROM b
CREATE VIEW IF NOT EXISTS x AS SELECT a FROM b
CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d
CREATE VIEW IF NOT EXISTS z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d
CREATE OR REPLACE VIEW x AS SELECT *
CREATE OR REPLACE TEMPORARY VIEW x AS SELECT *
CREATE TEMPORARY VIEW x AS SELECT a FROM d
CREATE TEMPORARY VIEW IF NOT EXISTS x AS SELECT a FROM d
CREATE TEMPORARY VIEW x AS WITH y AS (SELECT 1) SELECT * FROM y
CREATE MATERIALIZED VIEW x.y.z AS SELECT a FROM b
CREATE VIEW z (a, b) CREATE VIEW z (a, b)
CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c') CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c')
CREATE TEMPORARY FUNCTION f CREATE TEMPORARY FUNCTION f
@ -527,15 +548,17 @@ CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1'
CREATE FUNCTION a() LANGUAGE sql CREATE FUNCTION a() LANGUAGE sql
CREATE FUNCTION a() LANGUAGE sql RETURNS INT CREATE FUNCTION a() LANGUAGE sql RETURNS INT
CREATE FUNCTION a.b.c() CREATE FUNCTION a.b.c()
DROP FUNCTION a.b.c (INT)
CREATE INDEX abc ON t (a) CREATE INDEX abc ON t (a)
CREATE INDEX abc ON t (a, b, b) CREATE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX abc ON t (a, b, b) CREATE UNIQUE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl (a, b) CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl (a, b)
CREATE SCHEMA x CREATE SCHEMA x
CREATE SCHEMA IF NOT EXISTS y CREATE SCHEMA IF NOT EXISTS y
CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END'
DESCRIBE x DESCRIBE x
DROP INDEX a.b.c DROP INDEX a.b.c
DROP FUNCTION a.b.c (INT)
DROP MATERIALIZED VIEW x.y.z
CACHE TABLE x CACHE TABLE x
CACHE LAZY TABLE x CACHE LAZY TABLE x
CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value')
@ -545,12 +568,11 @@ CACHE LAZY TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a
CACHE TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a
CACHE TABLE x AS (SELECT 1 AS y) CACHE TABLE x AS (SELECT 1 AS y)
CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2') CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')
CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END'
DROP PROCEDURE a.b.c (INT) DROP PROCEDURE a.b.c (INT)
INSERT OVERWRITE TABLE a.b PARTITION(ds) SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds) SELECT x FROM y
INSERT OVERWRITE TABLE a.b PARTITION(ds='YYYY-MM-DD') SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds = 'YYYY-MM-DD') SELECT x FROM y
INSERT OVERWRITE TABLE a.b PARTITION(ds, hour) SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds, hour) SELECT x FROM y
INSERT OVERWRITE TABLE a.b PARTITION(ds='YYYY-MM-DD', hour='hh') SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds = 'YYYY-MM-DD', hour = 'hh') SELECT x FROM y
ALTER AGGREGATE bla(foo) OWNER TO CURRENT_USER ALTER AGGREGATE bla(foo) OWNER TO CURRENT_USER
ALTER RULE foo ON bla RENAME TO baz ALTER RULE foo ON bla RENAME TO baz
ALTER ROLE CURRENT_USER WITH REPLICATION ALTER ROLE CURRENT_USER WITH REPLICATION
@ -594,10 +616,10 @@ INSERT OVERWRITE TABLE a.b IF EXISTS SELECT * FROM y
INSERT OVERWRITE DIRECTORY 'x' SELECT 1 INSERT OVERWRITE DIRECTORY 'x' SELECT 1
INSERT OVERWRITE LOCAL DIRECTORY 'x' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' SELECT 1
INSERT OVERWRITE LOCAL DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' COLLECTION ITEMS TERMINATED BY '2' MAP KEYS TERMINATED BY '3' LINES TERMINATED BY '4' NULL DEFINED AS '5' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' COLLECTION ITEMS TERMINATED BY '2' MAP KEYS TERMINATED BY '3' LINES TERMINATED BY '4' NULL DEFINED AS '5' SELECT 1
LOAD DATA INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') LOAD DATA INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy')
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy')
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') INPUTFORMAT 'y' LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') INPUTFORMAT 'y'
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') INPUTFORMAT 'y' SERDE 'z' LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') INPUTFORMAT 'y' SERDE 'z'
LOAD DATA INPATH 'x' INTO TABLE y INPUTFORMAT 'y' SERDE 'z' LOAD DATA INPATH 'x' INTO TABLE y INPUTFORMAT 'y' SERDE 'z'
LOAD DATA INPATH 'x' INTO TABLE y.b INPUTFORMAT 'y' SERDE 'z' LOAD DATA INPATH 'x' INTO TABLE y.b INPUTFORMAT 'y' SERDE 'z'
SELECT 1 FROM PARQUET_SCAN('/x/y/*') AS y SELECT 1 FROM PARQUET_SCAN('/x/y/*') AS y
@ -658,10 +680,12 @@ ALTER TABLE integers ALTER COLUMN i SET DEFAULT 10
ALTER TABLE integers ALTER COLUMN i DROP DEFAULT ALTER TABLE integers ALTER COLUMN i DROP DEFAULT
ALTER TABLE mydataset.mytable DROP COLUMN A, DROP COLUMN IF EXISTS B ALTER TABLE mydataset.mytable DROP COLUMN A, DROP COLUMN IF EXISTS B
ALTER TABLE mydataset.mytable ADD COLUMN A TEXT, ADD COLUMN IF NOT EXISTS B INT ALTER TABLE mydataset.mytable ADD COLUMN A TEXT, ADD COLUMN IF NOT EXISTS B INT
ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN')
ALTER TABLE orders DROP IF EXISTS PARTITION(dt = '2014-05-14', country = 'IN')
ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN'), PARTITION(dt = '2014-05-15', country = 'IN')
ALTER TABLE mydataset.mytable DELETE WHERE x = 1
SELECT div.a FROM test_table AS div SELECT div.a FROM test_table AS div
WITH view AS (SELECT 1 AS x) SELECT * FROM view WITH view AS (SELECT 1 AS x) SELECT * FROM view
CREATE TABLE asd AS SELECT asd FROM asd WITH NO DATA
CREATE TABLE asd AS SELECT asd FROM asd WITH DATA
ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>> ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>>
ARRAY<INT>[1, 2, 3] ARRAY<INT>[1, 2, 3]
ARRAY<INT>[] ARRAY<INT>[]
@ -672,11 +696,6 @@ STRUCT<INT>(5)
STRUCT<DATE>("2011-05-05") STRUCT<DATE>("2011-05-05")
STRUCT<x INT, y TEXT>(1, t.str_col) STRUCT<x INT, y TEXT>(1, t.str_col)
SELECT CAST(NULL AS ARRAY<INT>) IS NULL AS array_is_null SELECT CAST(NULL AS ARRAY<INT>) IS NULL AS array_is_null
CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY)
CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY)
CREATE TABLE IF NOT EXISTS customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 1))
CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10))
ALTER TABLE "schema"."tablename" ADD CONSTRAINT "CHK_Name" CHECK (NOT "IdDwh" IS NULL AND "IdDwh" <> (0)) ALTER TABLE "schema"."tablename" ADD CONSTRAINT "CHK_Name" CHECK (NOT "IdDwh" IS NULL AND "IdDwh" <> (0))
ALTER TABLE persons ADD CONSTRAINT persons_pk PRIMARY KEY (first_name, last_name) ALTER TABLE persons ADD CONSTRAINT persons_pk PRIMARY KEY (first_name, last_name)
ALTER TABLE pets ADD CONSTRAINT pets_persons_fk FOREIGN KEY (owner_first_name, owner_last_name) REFERENCES persons ALTER TABLE pets ADD CONSTRAINT pets_persons_fk FOREIGN KEY (owner_first_name, owner_last_name) REFERENCES persons
@ -688,6 +707,5 @@ ALTER TABLE baa ADD CONSTRAINT boo PRIMARY KEY (x, y) NOT ENFORCED DEFERRABLE IN
ALTER TABLE baa ADD CONSTRAINT boo FOREIGN KEY (x, y) REFERENCES persons ON UPDATE NO ACTION ON DELETE NO ACTION MATCH FULL ALTER TABLE baa ADD CONSTRAINT boo FOREIGN KEY (x, y) REFERENCES persons ON UPDATE NO ACTION ON DELETE NO ACTION MATCH FULL
ALTER TABLE a ADD PRIMARY KEY (x, y) NOT ENFORCED ALTER TABLE a ADD PRIMARY KEY (x, y) NOT ENFORCED
ALTER TABLE a ADD FOREIGN KEY (x, y) REFERENCES bla ALTER TABLE a ADD FOREIGN KEY (x, y) REFERENCES bla
CREATE TABLE foo (baz_id INT REFERENCES baz(id) DEFERRABLE)
SELECT end FROM a SELECT end FROM a
SELECT id FROM b.a AS a QUALIFY ROW_NUMBER() OVER (PARTITION BY br ORDER BY sadf DESC) = 1 SELECT id FROM b.a AS a QUALIFY ROW_NUMBER() OVER (PARTITION BY br ORDER BY sadf DESC) = 1

View file

@ -40,7 +40,8 @@ class TestTokens(unittest.TestCase):
(TokenType.SELECT, "SELECT"), (TokenType.SELECT, "SELECT"),
(TokenType.BLOCK_START, "{{"), (TokenType.BLOCK_START, "{{"),
(TokenType.VAR, "x"), (TokenType.VAR, "x"),
(TokenType.BLOCK_END, "}}"), (TokenType.R_BRACE, "}"),
(TokenType.R_BRACE, "}"),
(TokenType.COMMA, ","), (TokenType.COMMA, ","),
(TokenType.BLOCK_START, "{{-"), (TokenType.BLOCK_START, "{{-"),
(TokenType.VAR, "x"), (TokenType.VAR, "x"),
@ -55,7 +56,8 @@ class TestTokens(unittest.TestCase):
(TokenType.VAR, "a"), (TokenType.VAR, "a"),
(TokenType.BLOCK_START, "{{+"), (TokenType.BLOCK_START, "{{+"),
(TokenType.VAR, "b"), (TokenType.VAR, "b"),
(TokenType.BLOCK_END, "}}"), (TokenType.R_BRACE, "}"),
(TokenType.R_BRACE, "}"),
(TokenType.BLOCK_START, "{%"), (TokenType.BLOCK_START, "{%"),
(TokenType.VAR, "endfor"), (TokenType.VAR, "endfor"),
(TokenType.BLOCK_END, "%}"), (TokenType.BLOCK_END, "%}"),

View file

@ -9,6 +9,8 @@ from sqlglot.transforms import (
class TestTime(unittest.TestCase): class TestTime(unittest.TestCase):
maxDiff = None
def validate(self, transform, sql, target): def validate(self, transform, sql, target):
with self.subTest(sql): with self.subTest(sql):
self.assertEqual(parse_one(sql).transform(transform).sql(), target) self.assertEqual(parse_one(sql).transform(transform).sql(), target)
@ -17,7 +19,7 @@ class TestTime(unittest.TestCase):
self.validate( self.validate(
unalias_group, unalias_group,
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4", "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4",
"SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, b, x.c, 4", "SELECT a, b AS b, c AS c, 4 FROM x GROUP BY a, 2, x.c, 4",
) )
self.validate( self.validate(
unalias_group, unalias_group,
@ -37,7 +39,12 @@ class TestTime(unittest.TestCase):
self.validate( self.validate(
unalias_group, unalias_group,
"SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date", "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date",
"SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY the_date", "SELECT the_date AS the_date, COUNT(*) AS the_count FROM x GROUP BY 1",
)
self.validate(
unalias_group,
"SELECT a AS a FROM x GROUP BY DATE(a)",
"SELECT a AS a FROM x GROUP BY DATE(a)",
) )
def test_eliminate_distinct_on(self): def test_eliminate_distinct_on(self):