1
0
Fork 0

Adding upstream version 7.1.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 14:46:14 +01:00
parent 291e0c125c
commit 768d386bf5
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
42 changed files with 1430 additions and 253 deletions

View file

@ -1,6 +1,32 @@
Changelog Changelog
========= =========
v7.1.0
------
Changes:
- Improvement: Pretty generator now takes max\_text\_width which breaks segments into new lines
- New: exp.to\_table helper to turn table names into table expression objects
- New: int[] type parsers
- New: annotations are now generated in sql
v7.0.0
------
Changes:
- Breaking: DISTINCT within functions now take in multiple values eg. COUNT(DISTINCT a, b).
exp.Distinct no longer uses `this` and now uses the expressions property
- New: Expression False kwargs are now excluded from equality checks
- New: Parse DESCRIBE and CREATE SCHEMA
- New: DELETE and VALUES builder
- New: Unused CTE and JOINS are now removed in the optimizer
v6.3.0 v6.3.0
------ ------

View file

@ -23,7 +23,7 @@ from sqlglot.generator import Generator
from sqlglot.parser import Parser from sqlglot.parser import Parser
from sqlglot.tokens import Tokenizer, TokenType from sqlglot.tokens import Tokenizer, TokenType
__version__ = "6.3.1" __version__ = "7.1.3"
pretty = False pretty = False

View file

@ -1,7 +1,6 @@
from sqlglot import exp from sqlglot import exp
from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
from sqlglot.generator import Generator from sqlglot.generator import Generator
from sqlglot.helper import csv
from sqlglot.parser import Parser, parse_var_map from sqlglot.parser import Parser, parse_var_map
from sqlglot.tokens import Tokenizer, TokenType from sqlglot.tokens import Tokenizer, TokenType
@ -66,7 +65,7 @@ class ClickHouse(Dialect):
TRANSFORMS = { TRANSFORMS = {
**Generator.TRANSFORMS, **Generator.TRANSFORMS,
exp.Array: inline_array_sql, exp.Array: inline_array_sql,
exp.StrPosition: lambda self, e: f"position({csv(self.sql(e, 'this'), self.sql(e, 'substr'), self.sql(e, 'position'))})", exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),

View file

@ -2,7 +2,7 @@ from enum import Enum
from sqlglot import exp from sqlglot import exp
from sqlglot.generator import Generator from sqlglot.generator import Generator
from sqlglot.helper import csv, list_get from sqlglot.helper import list_get
from sqlglot.parser import Parser from sqlglot.parser import Parser
from sqlglot.time import format_time from sqlglot.time import format_time
from sqlglot.tokens import Tokenizer from sqlglot.tokens import Tokenizer
@ -177,11 +177,11 @@ class Dialect(metaclass=_Dialect):
def rename_func(name): def rename_func(name):
def _rename(self, expression): def _rename(self, expression):
args = ( args = (
self.expressions(expression, flat=True) expression.expressions
if isinstance(expression, exp.Func) and expression.is_var_len_args if isinstance(expression, exp.Func) and expression.is_var_len_args
else csv(*[self.sql(e) for e in expression.args.values()]) else expression.args.values()
) )
return f"{name}({args})" return f"{name}({self.format_args(*args)})"
return _rename return _rename
@ -189,15 +189,11 @@ def rename_func(name):
def approx_count_distinct_sql(self, expression): def approx_count_distinct_sql(self, expression):
if expression.args.get("accuracy"): if expression.args.get("accuracy"):
self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy") self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
return f"APPROX_COUNT_DISTINCT({self.sql(expression, 'this')})" return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})"
def if_sql(self, expression): def if_sql(self, expression):
expressions = csv( expressions = self.format_args(expression.this, expression.args.get("true"), expression.args.get("false"))
self.sql(expression, "this"),
self.sql(expression, "true"),
self.sql(expression, "false"),
)
return f"IF({expressions})" return f"IF({expressions})"
@ -254,6 +250,11 @@ def no_trycast_sql(self, expression):
return self.cast_sql(expression) return self.cast_sql(expression)
def no_properties_sql(self, expression):
self.unsupported("Properties unsupported")
return ""
def str_position_sql(self, expression): def str_position_sql(self, expression):
this = self.sql(expression, "this") this = self.sql(expression, "this")
substr = self.sql(expression, "substr") substr = self.sql(expression, "substr")
@ -275,13 +276,13 @@ def var_map_sql(self, expression):
if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
self.unsupported("Cannot convert array columns into map.") self.unsupported("Cannot convert array columns into map.")
return f"MAP({self.sql(keys)}, {self.sql(values)})" return f"MAP({self.format_args(keys, values)})"
args = [] args = []
for key, value in zip(keys.expressions, values.expressions): for key, value in zip(keys.expressions, values.expressions):
args.append(self.sql(key)) args.append(self.sql(key))
args.append(self.sql(value)) args.append(self.sql(value))
return f"MAP({csv(*args)})" return f"MAP({self.format_args(*args)})"
def format_time_lambda(exp_class, dialect, default=None): def format_time_lambda(exp_class, dialect, default=None):

View file

@ -6,6 +6,7 @@ from sqlglot.dialects.dialect import (
arrow_json_extract_sql, arrow_json_extract_sql,
format_time_lambda, format_time_lambda,
no_pivot_sql, no_pivot_sql,
no_properties_sql,
no_safe_divide_sql, no_safe_divide_sql,
no_tablesample_sql, no_tablesample_sql,
rename_func, rename_func,
@ -68,6 +69,12 @@ def _struct_pack_sql(self, expression):
return f"STRUCT_PACK({', '.join(args)})" return f"STRUCT_PACK({', '.join(args)})"
def _datatype_sql(self, expression):
if expression.this == exp.DataType.Type.ARRAY:
return f"{self.expressions(expression, flat=True)}[]"
return self.datatype_sql(expression)
class DuckDB(Dialect): class DuckDB(Dialect):
class Tokenizer(Tokenizer): class Tokenizer(Tokenizer):
KEYWORDS = { KEYWORDS = {
@ -106,6 +113,8 @@ class DuckDB(Dialect):
} }
class Generator(Generator): class Generator(Generator):
STRUCT_DELIMITER = ("(", ")")
TRANSFORMS = { TRANSFORMS = {
**Generator.TRANSFORMS, **Generator.TRANSFORMS,
exp.ApproxDistinct: approx_count_distinct_sql, exp.ApproxDistinct: approx_count_distinct_sql,
@ -113,8 +122,9 @@ class DuckDB(Dialect):
exp.ArraySize: rename_func("ARRAY_LENGTH"), exp.ArraySize: rename_func("ARRAY_LENGTH"),
exp.ArraySort: _array_sort_sql, exp.ArraySort: _array_sort_sql,
exp.ArraySum: rename_func("LIST_SUM"), exp.ArraySum: rename_func("LIST_SUM"),
exp.DataType: _datatype_sql,
exp.DateAdd: _date_add, exp.DateAdd: _date_add,
exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""", exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.format_args(e.args.get("unit") or "'day'", e.expression, e.this)})""",
exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.dateint_format}) AS INT)", exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.dateint_format}) AS INT)",
exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.dateint_format}) AS DATE)", exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.dateint_format}) AS DATE)",
@ -124,6 +134,7 @@ class DuckDB(Dialect):
exp.JSONBExtract: arrow_json_extract_sql, exp.JSONBExtract: arrow_json_extract_sql,
exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, exp.JSONBExtractScalar: arrow_json_extract_scalar_sql,
exp.Pivot: no_pivot_sql, exp.Pivot: no_pivot_sql,
exp.Properties: no_properties_sql,
exp.RegexpLike: rename_func("REGEXP_MATCHES"), exp.RegexpLike: rename_func("REGEXP_MATCHES"),
exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
exp.SafeDivide: no_safe_divide_sql, exp.SafeDivide: no_safe_divide_sql,

View file

@ -14,7 +14,7 @@ from sqlglot.dialects.dialect import (
var_map_sql, var_map_sql,
) )
from sqlglot.generator import Generator from sqlglot.generator import Generator
from sqlglot.helper import csv, list_get from sqlglot.helper import list_get
from sqlglot.parser import Parser, parse_var_map from sqlglot.parser import Parser, parse_var_map
from sqlglot.tokens import Tokenizer from sqlglot.tokens import Tokenizer
@ -32,7 +32,7 @@ def _property_sql(self, expression):
def _str_to_unix(self, expression): def _str_to_unix(self, expression):
return f"UNIX_TIMESTAMP({csv(self.sql(expression, 'this'), _time_format(self, expression))})" return f"UNIX_TIMESTAMP({self.format_args(expression.this, _time_format(self, expression))})"
def _str_to_date(self, expression): def _str_to_date(self, expression):
@ -226,7 +226,7 @@ class Hive(Dialect):
exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
exp.SetAgg: rename_func("COLLECT_SET"), exp.SetAgg: rename_func("COLLECT_SET"),
exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))",
exp.StrPosition: lambda self, e: f"LOCATE({csv(self.sql(e, 'substr'), self.sql(e, 'this'), self.sql(e, 'position'))})", exp.StrPosition: lambda self, e: f"LOCATE({self.format_args(e.args.get('substr'), e.this, e.args.get('position'))})",
exp.StrToDate: _str_to_date, exp.StrToDate: _str_to_date,
exp.StrToTime: _str_to_time, exp.StrToTime: _str_to_time,
exp.StrToUnix: _str_to_unix, exp.StrToUnix: _str_to_unix,
@ -241,7 +241,7 @@ class Hive(Dialect):
exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})",
exp.TsOrDsToDate: _to_date_sql, exp.TsOrDsToDate: _to_date_sql,
exp.TryCast: no_trycast_sql, exp.TryCast: no_trycast_sql,
exp.UnixToStr: lambda self, e: f"FROM_UNIXTIME({csv(self.sql(e, 'this'), _time_format(self, e))})", exp.UnixToStr: lambda self, e: f"FROM_UNIXTIME({self.format_args(e.this, _time_format(self, e))})",
exp.UnixToTime: rename_func("FROM_UNIXTIME"), exp.UnixToTime: rename_func("FROM_UNIXTIME"),
exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'value')}", exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'value')}",

View file

@ -167,6 +167,7 @@ class Postgres(Dialect):
**Tokenizer.KEYWORDS, **Tokenizer.KEYWORDS,
"ALWAYS": TokenType.ALWAYS, "ALWAYS": TokenType.ALWAYS,
"BY DEFAULT": TokenType.BY_DEFAULT, "BY DEFAULT": TokenType.BY_DEFAULT,
"COMMENT ON": TokenType.COMMENT_ON,
"IDENTITY": TokenType.IDENTITY, "IDENTITY": TokenType.IDENTITY,
"GENERATED": TokenType.GENERATED, "GENERATED": TokenType.GENERATED,
"DOUBLE PRECISION": TokenType.DOUBLE, "DOUBLE PRECISION": TokenType.DOUBLE,

View file

@ -11,7 +11,7 @@ from sqlglot.dialects.dialect import (
) )
from sqlglot.dialects.mysql import MySQL from sqlglot.dialects.mysql import MySQL
from sqlglot.generator import Generator from sqlglot.generator import Generator
from sqlglot.helper import csv, list_get from sqlglot.helper import list_get
from sqlglot.parser import Parser from sqlglot.parser import Parser
from sqlglot.tokens import Tokenizer, TokenType from sqlglot.tokens import Tokenizer, TokenType
@ -26,7 +26,7 @@ def _concat_ws_sql(self, expression):
sep, *args = expression.expressions sep, *args = expression.expressions
sep = self.sql(sep) sep = self.sql(sep)
if len(args) > 1: if len(args) > 1:
return f"ARRAY_JOIN(ARRAY[{csv(*(self.sql(e) for e in args))}], {sep})" return f"ARRAY_JOIN(ARRAY[{self.format_args(*args)}], {sep})"
return f"ARRAY_JOIN({self.sql(args[0])}, {sep})" return f"ARRAY_JOIN({self.sql(args[0])}, {sep})"
@ -66,7 +66,7 @@ def _no_sort_array(self, expression):
comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END"
else: else:
comparator = None comparator = None
args = csv(self.sql(expression, "this"), comparator) args = self.format_args(expression.this, comparator)
return f"ARRAY_SORT({args})" return f"ARRAY_SORT({args})"

View file

@ -1,7 +1,6 @@
from sqlglot import exp from sqlglot import exp
from sqlglot.dialects.dialect import Dialect from sqlglot.dialects.dialect import Dialect
from sqlglot.generator import Generator from sqlglot.generator import Generator
from sqlglot.helper import list_get
from sqlglot.parser import Parser from sqlglot.parser import Parser
@ -16,7 +15,7 @@ def _coalesce_sql(self, expression):
def _count_sql(self, expression): def _count_sql(self, expression):
this = expression.this this = expression.this
if isinstance(this, exp.Distinct): if isinstance(this, exp.Distinct):
return f"COUNTD({self.sql(this, 'this')})" return f"COUNTD({self.expressions(this, flat=True)})"
return f"COUNT({self.sql(expression, 'this')})" return f"COUNT({self.sql(expression, 'this')})"
@ -33,5 +32,5 @@ class Tableau(Dialect):
FUNCTIONS = { FUNCTIONS = {
**Parser.FUNCTIONS, **Parser.FUNCTIONS,
"IFNULL": exp.Coalesce.from_arg_list, "IFNULL": exp.Coalesce.from_arg_list,
"COUNTD": lambda args: exp.Count(this=exp.Distinct(this=list_get(args, 0))), "COUNTD": lambda args: exp.Count(this=exp.Distinct(expressions=args)),
} }

View file

@ -1,3 +1,4 @@
import datetime
import numbers import numbers
import re import re
from collections import deque from collections import deque
@ -508,7 +509,7 @@ class DerivedTable(Expression):
return [select.alias_or_name for select in self.selects] return [select.alias_or_name for select in self.selects]
class Unionable: class Unionable(Expression):
def union(self, expression, distinct=True, dialect=None, **opts): def union(self, expression, distinct=True, dialect=None, **opts):
""" """
Builds a UNION expression. Builds a UNION expression.
@ -614,6 +615,10 @@ class Create(Expression):
} }
class Describe(Expression):
pass
class UserDefinedFunction(Expression): class UserDefinedFunction(Expression):
arg_types = {"this": True, "expressions": False} arg_types = {"this": True, "expressions": False}
@ -741,6 +746,11 @@ class Check(Expression):
pass pass
class Directory(Expression):
# https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-dml-insert-overwrite-directory-hive.html
arg_types = {"this": True, "local": False, "row_format": False}
class ForeignKey(Expression): class ForeignKey(Expression):
arg_types = { arg_types = {
"expressions": True, "expressions": True,
@ -804,6 +814,18 @@ class Introducer(Expression):
arg_types = {"this": True, "expression": True} arg_types = {"this": True, "expression": True}
class LoadData(Expression):
arg_types = {
"this": True,
"local": False,
"overwrite": False,
"inpath": True,
"partition": False,
"input_format": False,
"serde": False,
}
class Partition(Expression): class Partition(Expression):
pass pass
@ -1037,6 +1059,18 @@ class Reference(Expression):
arg_types = {"this": True, "expressions": True} arg_types = {"this": True, "expressions": True}
class RowFormat(Expression):
# https://cwiki.apache.org/confluence/display/hive/languagemanual+dml
arg_types = {
"fields": False,
"escaped": False,
"collection_items": False,
"map_keys": False,
"lines": False,
"null": False,
}
class Tuple(Expression): class Tuple(Expression):
arg_types = {"expressions": False} arg_types = {"expressions": False}
@ -1071,6 +1105,14 @@ class Subqueryable(Unionable):
return [] return []
return with_.expressions return with_.expressions
@property
def selects(self):
raise NotImplementedError("Subqueryable objects must implement `selects`")
@property
def named_selects(self):
raise NotImplementedError("Subqueryable objects must implement `named_selects`")
def with_( def with_(
self, self,
alias, alias,
@ -1158,7 +1200,7 @@ class Table(Expression):
} }
class Union(Subqueryable, Expression): class Union(Subqueryable):
arg_types = { arg_types = {
"with": False, "with": False,
"this": True, "this": True,
@ -1169,7 +1211,11 @@ class Union(Subqueryable, Expression):
@property @property
def named_selects(self): def named_selects(self):
return self.args["this"].unnest().named_selects return self.this.unnest().named_selects
@property
def selects(self):
return self.this.unnest().selects
@property @property
def left(self): def left(self):
@ -1222,7 +1268,7 @@ class Schema(Expression):
arg_types = {"this": False, "expressions": True} arg_types = {"this": False, "expressions": True}
class Select(Subqueryable, Expression): class Select(Subqueryable):
arg_types = { arg_types = {
"with": False, "with": False,
"expressions": False, "expressions": False,
@ -2075,7 +2121,7 @@ class Bracket(Condition):
class Distinct(Expression): class Distinct(Expression):
arg_types = {"this": False, "on": False} arg_types = {"expressions": False, "on": False}
class In(Predicate): class In(Predicate):
@ -2233,6 +2279,14 @@ class Case(Func):
class Cast(Func): class Cast(Func):
arg_types = {"this": True, "to": True} arg_types = {"this": True, "to": True}
@property
def name(self):
return self.this.name
@property
def to(self):
return self.args["to"]
class TryCast(Cast): class TryCast(Cast):
pass pass
@ -2666,7 +2720,7 @@ def _norm_args(expression):
else: else:
arg = _norm_arg(arg) arg = _norm_arg(arg)
if arg is not None: if arg is not None and arg is not False:
args[k] = arg args[k] = arg
return args return args
@ -3012,6 +3066,30 @@ def update(table, properties, where=None, from_=None, dialect=None, **opts):
return update return update
def delete(table, where=None, dialect=None, **opts):
"""
Builds a delete statement.
Example:
>>> delete("my_table", where="id > 1").sql()
'DELETE FROM my_table WHERE id > 1'
Args:
where (str|Condition): sql conditional parsed into a WHERE statement
dialect (str): the dialect used to parse the input expressions.
**opts: other options to use to parse the input expressions.
Returns:
Delete: the syntax tree for the DELETE statement.
"""
return Delete(
this=maybe_parse(table, into=Table, dialect=dialect, **opts),
where=Where(this=where)
if isinstance(where, Condition)
else maybe_parse(where, into=Where, dialect=dialect, prefix="WHERE", **opts),
)
def condition(expression, dialect=None, **opts): def condition(expression, dialect=None, **opts):
""" """
Initialize a logical condition expression. Initialize a logical condition expression.
@ -3131,6 +3209,25 @@ def to_identifier(alias, quoted=None):
return identifier return identifier
def to_table(sql_path, **kwargs):
"""
Create a table expression from a `[catalog].[schema].[table]` sql path. Catalog and schema are optional.
Example:
>>> to_table('catalog.db.table_name').sql()
'catalog.db.table_name'
Args:
sql_path(str): `[catalog].[schema].[table]` string
Returns:
Table: A table expression
"""
table_parts = sql_path.split(".")
catalog, db, table_name = [
to_identifier(x) if x is not None else x for x in [None] * (3 - len(table_parts)) + table_parts
]
return Table(this=table_name, db=db, catalog=catalog, **kwargs)
def alias_(expression, alias, table=False, dialect=None, quoted=None, **opts): def alias_(expression, alias, table=False, dialect=None, quoted=None, **opts):
""" """
Create an Alias expression. Create an Alias expression.
@ -3216,6 +3313,28 @@ def table_(table, db=None, catalog=None, quoted=None):
) )
def values(values, alias=None):
"""Build VALUES statement.
Example:
>>> values([(1, '2')]).sql()
"VALUES (1, '2')"
Args:
values (list[tuple[str | Expression]]): values statements that will be converted to SQL
alias (str): optional alias
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
Values: the Values expression object
"""
return Values(
expressions=[convert(tup) for tup in values],
alias=to_identifier(alias) if alias else None,
)
def convert(value): def convert(value):
"""Convert a python value into an expression object. """Convert a python value into an expression object.
@ -3246,6 +3365,12 @@ def convert(value):
keys=[convert(k) for k in value.keys()], keys=[convert(k) for k in value.keys()],
values=[convert(v) for v in value.values()], values=[convert(v) for v in value.values()],
) )
if isinstance(value, datetime.datetime):
datetime_literal = Literal.string(value.strftime("%Y-%m-%d %H:%M:%S"))
return TimeStrToTime(this=datetime_literal)
if isinstance(value, datetime.date):
date_literal = Literal.string(value.strftime("%Y-%m-%d"))
return DateStrToDate(this=date_literal)
raise ValueError(f"Cannot convert {value}") raise ValueError(f"Cannot convert {value}")

View file

@ -2,7 +2,7 @@ import logging
from sqlglot import exp from sqlglot import exp
from sqlglot.errors import ErrorLevel, UnsupportedError, concat_errors from sqlglot.errors import ErrorLevel, UnsupportedError, concat_errors
from sqlglot.helper import apply_index_offset, csv, ensure_list from sqlglot.helper import apply_index_offset, csv
from sqlglot.time import format_time from sqlglot.time import format_time
from sqlglot.tokens import TokenType from sqlglot.tokens import TokenType
@ -43,14 +43,18 @@ class Generator:
Default: 3 Default: 3
leading_comma (bool): if the the comma is leading or trailing in select statements leading_comma (bool): if the the comma is leading or trailing in select statements
Default: False Default: False
max_text_width: The max number of characters in a segment before creating new lines in pretty mode.
The default is on the smaller end because the length only represents a segment and not the true
line length.
Default: 80
""" """
TRANSFORMS = { TRANSFORMS = {
exp.CharacterSetProperty: lambda self, e: f"{'DEFAULT ' if e.args['default'] else ''}CHARACTER SET={self.sql(e, 'value')}", exp.CharacterSetProperty: lambda self, e: f"{'DEFAULT ' if e.args['default'] else ''}CHARACTER SET={self.sql(e, 'value')}",
exp.DateAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e, 'unit')})", exp.DateAdd: lambda self, e: f"DATE_ADD({self.format_args(e.this, e.expression, e.args.get('unit'))})",
exp.DateDiff: lambda self, e: f"DATEDIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')})", exp.DateDiff: lambda self, e: f"DATEDIFF({self.format_args(e.this, e.expression)})",
exp.TsOrDsAdd: lambda self, e: f"TS_OR_DS_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e, 'unit')})", exp.TsOrDsAdd: lambda self, e: f"TS_OR_DS_ADD({self.format_args(e.this, e.expression, e.args.get('unit'))})",
exp.VarMap: lambda self, e: f"MAP({self.sql(e.args['keys'])}, {self.sql(e.args['values'])})", exp.VarMap: lambda self, e: f"MAP({self.format_args(e.args['keys'], e.args['values'])})",
exp.LanguageProperty: lambda self, e: self.naked_property(e), exp.LanguageProperty: lambda self, e: self.naked_property(e),
exp.LocationProperty: lambda self, e: self.naked_property(e), exp.LocationProperty: lambda self, e: self.naked_property(e),
exp.ReturnsProperty: lambda self, e: self.naked_property(e), exp.ReturnsProperty: lambda self, e: self.naked_property(e),
@ -111,6 +115,7 @@ class Generator:
"_replace_backslash", "_replace_backslash",
"_escaped_quote_end", "_escaped_quote_end",
"_leading_comma", "_leading_comma",
"_max_text_width",
) )
def __init__( def __init__(
@ -135,6 +140,7 @@ class Generator:
null_ordering=None, null_ordering=None,
max_unsupported=3, max_unsupported=3,
leading_comma=False, leading_comma=False,
max_text_width=80,
): ):
import sqlglot import sqlglot
@ -162,6 +168,7 @@ class Generator:
self._replace_backslash = self.escape == "\\" self._replace_backslash = self.escape == "\\"
self._escaped_quote_end = self.escape + self.quote_end self._escaped_quote_end = self.escape + self.quote_end
self._leading_comma = leading_comma self._leading_comma = leading_comma
self._max_text_width = max_text_width
def generate(self, expression): def generate(self, expression):
""" """
@ -268,7 +275,7 @@ class Generator:
raise ValueError(f"Unsupported expression type {expression.__class__.__name__}") raise ValueError(f"Unsupported expression type {expression.__class__.__name__}")
def annotation_sql(self, expression): def annotation_sql(self, expression):
return self.sql(expression, "expression") return f"{self.sql(expression, 'expression')} # {expression.name.strip()}"
def uncache_sql(self, expression): def uncache_sql(self, expression):
table = self.sql(expression, "this") table = self.sql(expression, "this")
@ -364,6 +371,9 @@ class Generator:
) )
return self.prepend_ctes(expression, expression_sql) return self.prepend_ctes(expression, expression_sql)
def describe_sql(self, expression):
return f"DESCRIBE {self.sql(expression, 'this')}"
def prepend_ctes(self, expression, sql): def prepend_ctes(self, expression, sql):
with_ = self.sql(expression, "with") with_ = self.sql(expression, "with")
if with_: if with_:
@ -405,6 +415,12 @@ class Generator:
) )
return f"{type_sql}{nested}" return f"{type_sql}{nested}"
def directory_sql(self, expression):
local = "LOCAL " if expression.args.get("local") else ""
row_format = self.sql(expression, "row_format")
row_format = f" {row_format}" if row_format else ""
return f"{local}DIRECTORY {self.sql(expression, 'this')}{row_format}"
def delete_sql(self, expression): def delete_sql(self, expression):
this = self.sql(expression, "this") this = self.sql(expression, "this")
where_sql = self.sql(expression, "where") where_sql = self.sql(expression, "where")
@ -513,13 +529,19 @@ class Generator:
return f"{key}={value}" return f"{key}={value}"
def insert_sql(self, expression): def insert_sql(self, expression):
kind = "OVERWRITE TABLE" if expression.args.get("overwrite") else "INTO" overwrite = expression.args.get("overwrite")
this = self.sql(expression, "this")
if isinstance(expression.this, exp.Directory):
this = "OVERWRITE " if overwrite else "INTO "
else:
this = "OVERWRITE TABLE " if overwrite else "INTO "
this = f"{this}{self.sql(expression, 'this')}"
exists = " IF EXISTS " if expression.args.get("exists") else " " exists = " IF EXISTS " if expression.args.get("exists") else " "
partition_sql = self.sql(expression, "partition") if expression.args.get("partition") else "" partition_sql = self.sql(expression, "partition") if expression.args.get("partition") else ""
expression_sql = self.sql(expression, "expression") expression_sql = self.sql(expression, "expression")
sep = self.sep() if partition_sql else "" sep = self.sep() if partition_sql else ""
sql = f"INSERT {kind} {this}{exists}{partition_sql}{sep}{expression_sql}" sql = f"INSERT {this}{exists}{partition_sql}{sep}{expression_sql}"
return self.prepend_ctes(expression, sql) return self.prepend_ctes(expression, sql)
def intersect_sql(self, expression): def intersect_sql(self, expression):
@ -534,6 +556,21 @@ class Generator:
def introducer_sql(self, expression): def introducer_sql(self, expression):
return f"{self.sql(expression, 'this')} {self.sql(expression, 'expression')}" return f"{self.sql(expression, 'this')} {self.sql(expression, 'expression')}"
def rowformat_sql(self, expression):
fields = expression.args.get("fields")
fields = f" FIELDS TERMINATED BY {fields}" if fields else ""
escaped = expression.args.get("escaped")
escaped = f" ESCAPED BY {escaped}" if escaped else ""
items = expression.args.get("collection_items")
items = f" COLLECTION ITEMS TERMINATED BY {items}" if items else ""
keys = expression.args.get("map_keys")
keys = f" MAP KEYS TERMINATED BY {keys}" if keys else ""
lines = expression.args.get("lines")
lines = f" LINES TERMINATED BY {lines}" if lines else ""
null = expression.args.get("null")
null = f" NULL DEFINED AS {null}" if null else ""
return f"ROW FORMAT DELIMITED{fields}{escaped}{items}{keys}{lines}{null}"
def table_sql(self, expression): def table_sql(self, expression):
table = ".".join( table = ".".join(
part part
@ -688,6 +725,19 @@ class Generator:
return f"{self.quote_start}{text}{self.quote_end}" return f"{self.quote_start}{text}{self.quote_end}"
return text return text
def loaddata_sql(self, expression):
local = " LOCAL" if expression.args.get("local") else ""
inpath = f" INPATH {self.sql(expression, 'inpath')}"
overwrite = " OVERWRITE" if expression.args.get("overwrite") else ""
this = f" INTO TABLE {self.sql(expression, 'this')}"
partition = self.sql(expression, "partition")
partition = f" {partition}" if partition else ""
input_format = self.sql(expression, "input_format")
input_format = f" INPUTFORMAT {input_format}" if input_format else ""
serde = self.sql(expression, "serde")
serde = f" SERDE {serde}" if serde else ""
return f"LOAD DATA{local}{inpath}{overwrite}{this}{partition}{input_format}{serde}"
def null_sql(self, *_): def null_sql(self, *_):
return "NULL" return "NULL"
@ -885,20 +935,24 @@ class Generator:
return f"EXISTS{self.wrap(expression)}" return f"EXISTS{self.wrap(expression)}"
def case_sql(self, expression): def case_sql(self, expression):
this = self.indent(self.sql(expression, "this"), skip_first=True) this = self.sql(expression, "this")
this = f" {this}" if this else "" statements = [f"CASE {this}" if this else "CASE"]
ifs = []
for e in expression.args["ifs"]: for e in expression.args["ifs"]:
ifs.append(self.indent(f"WHEN {self.sql(e, 'this')}")) statements.append(f"WHEN {self.sql(e, 'this')}")
ifs.append(self.indent(f"THEN {self.sql(e, 'true')}")) statements.append(f"THEN {self.sql(e, 'true')}")
if expression.args.get("default") is not None: default = self.sql(expression, "default")
ifs.append(self.indent(f"ELSE {self.sql(expression, 'default')}"))
ifs = "".join(self.seg(self.indent(e, skip_first=True)) for e in ifs) if default:
statement = f"CASE{this}{ifs}{self.seg('END')}" statements.append(f"ELSE {default}")
return statement
statements.append("END")
if self.pretty and self.text_width(statements) > self._max_text_width:
return self.indent("\n".join(statements), skip_first=True, skip_last=True)
return " ".join(statements)
def constraint_sql(self, expression): def constraint_sql(self, expression):
this = self.sql(expression, "this") this = self.sql(expression, "this")
@ -970,7 +1024,7 @@ class Generator:
return f"REFERENCES {this}({expressions})" return f"REFERENCES {this}({expressions})"
def anonymous_sql(self, expression): def anonymous_sql(self, expression):
args = self.indent(self.expressions(expression, flat=True), skip_first=True, skip_last=True) args = self.format_args(*expression.expressions)
return f"{self.normalize_func(self.sql(expression, 'this'))}({args})" return f"{self.normalize_func(self.sql(expression, 'this'))}({args})"
def paren_sql(self, expression): def paren_sql(self, expression):
@ -1008,7 +1062,9 @@ class Generator:
if not self.pretty: if not self.pretty:
return self.binary(expression, op) return self.binary(expression, op)
return f"\n{op} ".join(self.sql(e) for e in expression.flatten(unnest=False)) sqls = tuple(self.sql(e) for e in expression.flatten(unnest=False))
sep = "\n" if self.text_width(sqls) > self._max_text_width else " "
return f"{sep}{op} ".join(sqls)
def bitwiseand_sql(self, expression): def bitwiseand_sql(self, expression):
return self.binary(expression, "&") return self.binary(expression, "&")
@ -1039,7 +1095,7 @@ class Generator:
return f"{self.sql(expression, 'this').upper()} {expression.text('expression').strip()}" return f"{self.sql(expression, 'this').upper()} {expression.text('expression').strip()}"
def distinct_sql(self, expression): def distinct_sql(self, expression):
this = self.sql(expression, "this") this = self.expressions(expression, flat=True)
this = f" {this}" if this else "" this = f" {this}" if this else ""
on = self.sql(expression, "on") on = self.sql(expression, "on")
@ -1128,13 +1184,23 @@ class Generator:
def function_fallback_sql(self, expression): def function_fallback_sql(self, expression):
args = [] args = []
for arg_key in expression.arg_types: for arg_value in expression.args.values():
arg_value = ensure_list(expression.args.get(arg_key) or []) if isinstance(arg_value, list):
for a in arg_value: for value in arg_value:
args.append(self.sql(a)) args.append(value)
elif arg_value:
args.append(arg_value)
args_str = self.indent(", ".join(args), skip_first=True, skip_last=True) return f"{self.normalize_func(expression.sql_name())}({self.format_args(*args)})"
return f"{self.normalize_func(expression.sql_name())}({args_str})"
def format_args(self, *args):
args = tuple(self.sql(arg) for arg in args if arg is not None)
if self.pretty and self.text_width(args) > self._max_text_width:
return self.indent("\n" + f",\n".join(args) + "\n", skip_first=True, skip_last=True)
return ", ".join(args)
def text_width(self, args):
return sum(len(arg) for arg in args)
def format_time(self, expression): def format_time(self, expression):
return format_time(self.sql(expression, "format"), self.time_mapping, self.time_trie) return format_time(self.sql(expression, "format"), self.time_mapping, self.time_trie)

View file

@ -0,0 +1,42 @@
from sqlglot.optimizer.scope import Scope, build_scope
def eliminate_ctes(expression):
"""
Remove unused CTEs from an expression.
Example:
>>> import sqlglot
>>> sql = "WITH y AS (SELECT a FROM x) SELECT a FROM z"
>>> expression = sqlglot.parse_one(sql)
>>> eliminate_ctes(expression).sql()
'SELECT a FROM z'
Args:
expression (sqlglot.Expression): expression to optimize
Returns:
sqlglot.Expression: optimized expression
"""
root = build_scope(expression)
ref_count = root.ref_count()
# Traverse the scope tree in reverse so we can remove chains of unused CTEs
for scope in reversed(list(root.traverse())):
if scope.is_cte:
count = ref_count[id(scope)]
if count <= 0:
cte_node = scope.expression.parent
with_node = cte_node.parent
cte_node.pop()
# Pop the entire WITH clause if this is the last CTE
if len(with_node.expressions) <= 0:
with_node.pop()
# Decrement the ref count for all sources this CTE selects from
for _, source in scope.selected_sources.values():
if isinstance(source, Scope):
ref_count[id(source)] -= 1
return expression

View file

@ -0,0 +1,160 @@
from sqlglot import expressions as exp
from sqlglot.optimizer.normalize import normalized
from sqlglot.optimizer.scope import Scope, traverse_scope
from sqlglot.optimizer.simplify import simplify
def eliminate_joins(expression):
"""
Remove unused joins from an expression.
This only removes joins when we know that the join condition doesn't produce duplicate rows.
Example:
>>> import sqlglot
>>> sql = "SELECT x.a FROM x LEFT JOIN (SELECT DISTINCT y.b FROM y) AS y ON x.b = y.b"
>>> expression = sqlglot.parse_one(sql)
>>> eliminate_joins(expression).sql()
'SELECT x.a FROM x'
Args:
expression (sqlglot.Expression): expression to optimize
Returns:
sqlglot.Expression: optimized expression
"""
for scope in traverse_scope(expression):
# If any columns in this scope aren't qualified, it's hard to determine if a join isn't used.
# It's probably possible to infer this from the outputs of derived tables.
# But for now, let's just skip this rule.
if scope.unqualified_columns:
continue
joins = scope.expression.args.get("joins", [])
# Reverse the joins so we can remove chains of unused joins
for join in reversed(joins):
alias = join.this.alias_or_name
if _should_eliminate_join(scope, join, alias):
join.pop()
scope.remove_source(alias)
return expression
def _should_eliminate_join(scope, join, alias):
inner_source = scope.sources.get(alias)
return (
isinstance(inner_source, Scope)
and not _join_is_used(scope, join, alias)
and (
(join.side == "LEFT" and _is_joined_on_all_unique_outputs(inner_source, join))
or (not join.args.get("on") and _has_single_output_row(inner_source))
)
)
def _join_is_used(scope, join, alias):
# We need to find all columns that reference this join.
# But columns in the ON clause shouldn't count.
on = join.args.get("on")
if on:
on_clause_columns = set(id(column) for column in on.find_all(exp.Column))
else:
on_clause_columns = set()
return any(column for column in scope.source_columns(alias) if id(column) not in on_clause_columns)
def _is_joined_on_all_unique_outputs(scope, join):
unique_outputs = _unique_outputs(scope)
if not unique_outputs:
return False
_, join_keys, _ = join_condition(join)
remaining_unique_outputs = unique_outputs - set(c.name for c in join_keys)
return not remaining_unique_outputs
def _unique_outputs(scope):
"""Determine output columns of `scope` that must have a unique combination per row"""
if scope.expression.args.get("distinct"):
return set(scope.expression.named_selects)
group = scope.expression.args.get("group")
if group:
grouped_expressions = set(group.expressions)
grouped_outputs = set()
unique_outputs = set()
for select in scope.selects:
output = select.unalias()
if output in grouped_expressions:
grouped_outputs.add(output)
unique_outputs.add(select.alias_or_name)
# All the grouped expressions must be in the output
if not grouped_expressions.difference(grouped_outputs):
return unique_outputs
else:
return set()
if _has_single_output_row(scope):
return set(scope.expression.named_selects)
return set()
def _has_single_output_row(scope):
return isinstance(scope.expression, exp.Select) and (
all(isinstance(e.unalias(), exp.AggFunc) for e in scope.selects)
or _is_limit_1(scope)
or not scope.expression.args.get("from")
)
def _is_limit_1(scope):
limit = scope.expression.args.get("limit")
return limit and limit.expression.this == "1"
def join_condition(join):
"""
Extract the join condition from a join expression.
Args:
join (exp.Join)
Returns:
tuple[list[str], list[str], exp.Expression]:
Tuple of (source key, join key, remaining predicate)
"""
name = join.this.alias_or_name
on = join.args.get("on") or exp.TRUE
on = on.copy()
source_key = []
join_key = []
# find the join keys
# SELECT
# FROM x
# JOIN y
# ON x.a = y.b AND y.b > 1
#
# should pull y.b as the join key and x.a as the source key
if normalized(on):
for condition in on.flatten() if isinstance(on, exp.And) else [on]:
if isinstance(condition, exp.EQ):
left, right = condition.unnest_operands()
left_tables = exp.column_table_names(left)
right_tables = exp.column_table_names(right)
if name in left_tables and name not in right_tables:
join_key.append(left)
source_key.append(right)
condition.replace(exp.TRUE)
elif name in right_tables and name not in left_tables:
join_key.append(right)
source_key.append(left)
condition.replace(exp.TRUE)
on = simplify(on)
remaining_condition = None if on == exp.TRUE else on
return source_key, join_key, remaining_condition

View file

@ -8,7 +8,7 @@ from sqlglot.optimizer.simplify import simplify
def eliminate_subqueries(expression): def eliminate_subqueries(expression):
""" """
Rewrite subqueries as CTES, deduplicating if possible. Rewrite derived tables as CTES, deduplicating if possible.
Example: Example:
>>> import sqlglot >>> import sqlglot

View file

@ -119,6 +119,23 @@ def _mergeable(outer_scope, inner_select, leave_tables_isolated, from_or_join):
Returns: Returns:
bool: True if can be merged bool: True if can be merged
""" """
def _is_a_window_expression_in_unmergable_operation():
window_expressions = inner_select.find_all(exp.Window)
window_alias_names = {window.parent.alias_or_name for window in window_expressions}
inner_select_name = inner_select.parent.alias_or_name
unmergable_window_columns = [
column
for column in outer_scope.columns
if column.find_ancestor(exp.Where, exp.Group, exp.Order, exp.Join, exp.Having, exp.AggFunc)
]
window_expressions_in_unmergable = [
column
for column in unmergable_window_columns
if column.table == inner_select_name and column.name in window_alias_names
]
return any(window_expressions_in_unmergable)
return ( return (
isinstance(outer_scope.expression, exp.Select) isinstance(outer_scope.expression, exp.Select)
and isinstance(inner_select, exp.Select) and isinstance(inner_select, exp.Select)
@ -137,6 +154,7 @@ def _mergeable(outer_scope, inner_select, leave_tables_isolated, from_or_join):
and inner_select.args.get("where") and inner_select.args.get("where")
and any(j.side in {"FULL", "RIGHT"} for j in outer_scope.expression.args.get("joins", [])) and any(j.side in {"FULL", "RIGHT"} for j in outer_scope.expression.args.get("joins", []))
) )
and not _is_a_window_expression_in_unmergable_operation()
) )

View file

@ -1,3 +1,5 @@
from sqlglot.optimizer.eliminate_ctes import eliminate_ctes
from sqlglot.optimizer.eliminate_joins import eliminate_joins
from sqlglot.optimizer.eliminate_subqueries import eliminate_subqueries from sqlglot.optimizer.eliminate_subqueries import eliminate_subqueries
from sqlglot.optimizer.expand_multi_table_selects import expand_multi_table_selects from sqlglot.optimizer.expand_multi_table_selects import expand_multi_table_selects
from sqlglot.optimizer.isolate_table_selects import isolate_table_selects from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
@ -23,6 +25,8 @@ RULES = (
optimize_joins, optimize_joins,
eliminate_subqueries, eliminate_subqueries,
merge_subqueries, merge_subqueries,
eliminate_joins,
eliminate_ctes,
quote_identities, quote_identities,
) )

View file

@ -1,8 +1,6 @@
from collections import defaultdict
from sqlglot import exp from sqlglot import exp
from sqlglot.optimizer.normalize import normalized from sqlglot.optimizer.normalize import normalized
from sqlglot.optimizer.scope import traverse_scope from sqlglot.optimizer.scope import build_scope
from sqlglot.optimizer.simplify import simplify from sqlglot.optimizer.simplify import simplify
@ -22,15 +20,10 @@ def pushdown_predicates(expression):
Returns: Returns:
sqlglot.Expression: optimized expression sqlglot.Expression: optimized expression
""" """
scope_ref_count = defaultdict(lambda: 0) root = build_scope(expression)
scopes = traverse_scope(expression) scope_ref_count = root.ref_count()
scopes.reverse()
for scope in scopes: for scope in reversed(list(root.traverse())):
for _, source in scope.selected_sources.values():
scope_ref_count[id(source)] += 1
for scope in scopes:
select = scope.expression select = scope.expression
where = select.args.get("where") where = select.args.get("where")
if where: if where:
@ -152,9 +145,11 @@ def nodes_for_predicate(predicate, sources, scope_ref_count):
return {} return {}
nodes[table] = node nodes[table] = node
elif isinstance(node, exp.Select) and len(tables) == 1: elif isinstance(node, exp.Select) and len(tables) == 1:
# We can't push down window expressions
has_window_expression = any(select for select in node.selects if select.find(exp.Window))
# we can't push down predicates to select statements if they are referenced in # we can't push down predicates to select statements if they are referenced in
# multiple places. # multiple places.
if not node.args.get("group") and scope_ref_count[id(source)] < 2: if not node.args.get("group") and scope_ref_count[id(source)] < 2 and not has_window_expression:
nodes[table] = node nodes[table] = node
return nodes return nodes

View file

@ -1,4 +1,5 @@
import itertools import itertools
from collections import defaultdict
from enum import Enum, auto from enum import Enum, auto
from sqlglot import exp from sqlglot import exp
@ -314,6 +315,16 @@ class Scope:
self._external_columns = [c for c in self.columns if c.table not in self.selected_sources] self._external_columns = [c for c in self.columns if c.table not in self.selected_sources]
return self._external_columns return self._external_columns
@property
def unqualified_columns(self):
"""
Unqualified columns in the current scope.
Returns:
list[exp.Column]: Unqualified columns
"""
return [c for c in self.columns if not c.table]
@property @property
def join_hints(self): def join_hints(self):
""" """
@ -403,6 +414,21 @@ class Scope:
yield from child_scope.traverse() yield from child_scope.traverse()
yield self yield self
def ref_count(self):
"""
Count the number of times each scope in this tree is referenced.
Returns:
dict[int, int]: Mapping of Scope instance ID to reference count
"""
scope_ref_count = defaultdict(lambda: 0)
for scope in self.traverse():
for _, source in scope.selected_sources.values():
scope_ref_count[id(source)] += 1
return scope_ref_count
def traverse_scope(expression): def traverse_scope(expression):
""" """

View file

@ -135,11 +135,13 @@ class Parser:
TokenType.BOTH, TokenType.BOTH,
TokenType.BUCKET, TokenType.BUCKET,
TokenType.CACHE, TokenType.CACHE,
TokenType.CALL,
TokenType.COLLATE, TokenType.COLLATE,
TokenType.COMMIT, TokenType.COMMIT,
TokenType.CONSTRAINT, TokenType.CONSTRAINT,
TokenType.DEFAULT, TokenType.DEFAULT,
TokenType.DELETE, TokenType.DELETE,
TokenType.DESCRIBE,
TokenType.DETERMINISTIC, TokenType.DETERMINISTIC,
TokenType.EXECUTE, TokenType.EXECUTE,
TokenType.ENGINE, TokenType.ENGINE,
@ -160,6 +162,7 @@ class Parser:
TokenType.LAZY, TokenType.LAZY,
TokenType.LANGUAGE, TokenType.LANGUAGE,
TokenType.LEADING, TokenType.LEADING,
TokenType.LOCAL,
TokenType.LOCATION, TokenType.LOCATION,
TokenType.MATERIALIZED, TokenType.MATERIALIZED,
TokenType.NATURAL, TokenType.NATURAL,
@ -176,6 +179,7 @@ class Parser:
TokenType.REFERENCES, TokenType.REFERENCES,
TokenType.RETURNS, TokenType.RETURNS,
TokenType.ROWS, TokenType.ROWS,
TokenType.SCHEMA,
TokenType.SCHEMA_COMMENT, TokenType.SCHEMA_COMMENT,
TokenType.SEED, TokenType.SEED,
TokenType.SEMI, TokenType.SEMI,
@ -294,6 +298,11 @@ class Parser:
COLUMN_OPERATORS = { COLUMN_OPERATORS = {
TokenType.DOT: None, TokenType.DOT: None,
TokenType.DCOLON: lambda self, this, to: self.expression(
exp.Cast,
this=this,
to=to,
),
TokenType.ARROW: lambda self, this, path: self.expression( TokenType.ARROW: lambda self, this, path: self.expression(
exp.JSONExtract, exp.JSONExtract,
this=this, this=this,
@ -342,8 +351,10 @@ class Parser:
STATEMENT_PARSERS = { STATEMENT_PARSERS = {
TokenType.CREATE: lambda self: self._parse_create(), TokenType.CREATE: lambda self: self._parse_create(),
TokenType.DESCRIBE: lambda self: self._parse_describe(),
TokenType.DROP: lambda self: self._parse_drop(), TokenType.DROP: lambda self: self._parse_drop(),
TokenType.INSERT: lambda self: self._parse_insert(), TokenType.INSERT: lambda self: self._parse_insert(),
TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
TokenType.UPDATE: lambda self: self._parse_update(), TokenType.UPDATE: lambda self: self._parse_update(),
TokenType.DELETE: lambda self: self._parse_delete(), TokenType.DELETE: lambda self: self._parse_delete(),
TokenType.CACHE: lambda self: self._parse_cache(), TokenType.CACHE: lambda self: self._parse_cache(),
@ -449,7 +460,14 @@ class Parser:
MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
CREATABLES = {TokenType.TABLE, TokenType.VIEW, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE} CREATABLES = {
TokenType.TABLE,
TokenType.VIEW,
TokenType.FUNCTION,
TokenType.INDEX,
TokenType.PROCEDURE,
TokenType.SCHEMA,
}
STRICT_CAST = True STRICT_CAST = True
@ -650,7 +668,7 @@ class Parser:
materialized = self._match(TokenType.MATERIALIZED) materialized = self._match(TokenType.MATERIALIZED)
kind = self._match_set(self.CREATABLES) and self._prev.text kind = self._match_set(self.CREATABLES) and self._prev.text
if not kind: if not kind:
self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE") self.raise_error(f"Expected {self.CREATABLES}")
return return
return self.expression( return self.expression(
@ -677,7 +695,7 @@ class Parser:
create_token = self._match_set(self.CREATABLES) and self._prev create_token = self._match_set(self.CREATABLES) and self._prev
if not create_token: if not create_token:
self.raise_error("Expected TABLE, VIEW, INDEX, FUNCTION, or PROCEDURE") self.raise_error(f"Expected {self.CREATABLES}")
return return
exists = self._parse_exists(not_=True) exists = self._parse_exists(not_=True)
@ -692,7 +710,7 @@ class Parser:
expression = self._parse_select_or_expression() expression = self._parse_select_or_expression()
elif create_token.token_type == TokenType.INDEX: elif create_token.token_type == TokenType.INDEX:
this = self._parse_index() this = self._parse_index()
elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW): elif create_token.token_type in (TokenType.TABLE, TokenType.VIEW, TokenType.SCHEMA):
this = self._parse_table(schema=True) this = self._parse_table(schema=True)
properties = self._parse_properties() properties = self._parse_properties()
if self._match(TokenType.ALIAS): if self._match(TokenType.ALIAS):
@ -836,19 +854,74 @@ class Parser:
return self.expression(exp.Properties, expressions=properties) return self.expression(exp.Properties, expressions=properties)
return None return None
def _parse_describe(self):
self._match(TokenType.TABLE)
return self.expression(exp.Describe, this=self._parse_id_var())
def _parse_insert(self): def _parse_insert(self):
overwrite = self._match(TokenType.OVERWRITE) overwrite = self._match(TokenType.OVERWRITE)
self._match(TokenType.INTO) local = self._match(TokenType.LOCAL)
self._match(TokenType.TABLE) if self._match_text("DIRECTORY"):
this = self.expression(
exp.Directory,
this=self._parse_var_or_string(),
local=local,
row_format=self._parse_row_format(),
)
else:
self._match(TokenType.INTO)
self._match(TokenType.TABLE)
this = self._parse_table(schema=True)
return self.expression( return self.expression(
exp.Insert, exp.Insert,
this=self._parse_table(schema=True), this=this,
exists=self._parse_exists(), exists=self._parse_exists(),
partition=self._parse_partition(), partition=self._parse_partition(),
expression=self._parse_select(nested=True), expression=self._parse_select(nested=True),
overwrite=overwrite, overwrite=overwrite,
) )
def _parse_row_format(self):
if not self._match_pair(TokenType.ROW, TokenType.FORMAT):
return None
self._match_text("DELIMITED")
kwargs = {}
if self._match_text("FIELDS", "TERMINATED", "BY"):
kwargs["fields"] = self._parse_string()
if self._match_text("ESCAPED", "BY"):
kwargs["escaped"] = self._parse_string()
if self._match_text("COLLECTION", "ITEMS", "TERMINATED", "BY"):
kwargs["collection_items"] = self._parse_string()
if self._match_text("MAP", "KEYS", "TERMINATED", "BY"):
kwargs["map_keys"] = self._parse_string()
if self._match_text("LINES", "TERMINATED", "BY"):
kwargs["lines"] = self._parse_string()
if self._match_text("NULL", "DEFINED", "AS"):
kwargs["null"] = self._parse_string()
return self.expression(exp.RowFormat, **kwargs)
def _parse_load_data(self):
local = self._match(TokenType.LOCAL)
self._match_text("INPATH")
inpath = self._parse_string()
overwrite = self._match(TokenType.OVERWRITE)
self._match_pair(TokenType.INTO, TokenType.TABLE)
return self.expression(
exp.LoadData,
this=self._parse_table(schema=True),
local=local,
overwrite=overwrite,
inpath=inpath,
partition=self._parse_partition(),
input_format=self._match_text("INPUTFORMAT") and self._parse_string(),
serde=self._match_text("SERDE") and self._parse_string(),
)
def _parse_delete(self): def _parse_delete(self):
self._match(TokenType.FROM) self._match(TokenType.FROM)
@ -1484,6 +1557,14 @@ class Parser:
if self._match_set(self.RANGE_PARSERS): if self._match_set(self.RANGE_PARSERS):
this = self.RANGE_PARSERS[self._prev.token_type](self, this) this = self.RANGE_PARSERS[self._prev.token_type](self, this)
elif self._match(TokenType.ISNULL):
this = self.expression(exp.Is, this=this, expression=exp.Null())
# Postgres supports ISNULL and NOTNULL for conditions.
# https://blog.andreiavram.ro/postgresql-null-composite-type/
if self._match(TokenType.NOTNULL):
this = self.expression(exp.Is, this=this, expression=exp.Null())
this = self.expression(exp.Not, this=this)
if negate: if negate:
this = self.expression(exp.Not, this=this) this = self.expression(exp.Not, this=this)
@ -1582,12 +1663,6 @@ class Parser:
return self._parse_column() return self._parse_column()
return type_token return type_token
while self._match(TokenType.DCOLON):
type_token = self._parse_types()
if not type_token:
self.raise_error("Expected type")
this = self.expression(exp.Cast, this=this, to=type_token)
return this return this
def _parse_types(self): def _parse_types(self):
@ -1601,6 +1676,11 @@ class Parser:
is_struct = type_token == TokenType.STRUCT is_struct = type_token == TokenType.STRUCT
expressions = None expressions = None
if not nested and self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
return exp.DataType(
this=exp.DataType.Type.ARRAY, expressions=[exp.DataType.build(type_token.value)], nested=True
)
if self._match(TokenType.L_BRACKET): if self._match(TokenType.L_BRACKET):
self._retreat(index) self._retreat(index)
return None return None
@ -1611,7 +1691,7 @@ class Parser:
elif nested: elif nested:
expressions = self._parse_csv(self._parse_types) expressions = self._parse_csv(self._parse_types)
else: else:
expressions = self._parse_csv(self._parse_type) expressions = self._parse_csv(self._parse_conjunction)
if not expressions: if not expressions:
self._retreat(index) self._retreat(index)
@ -1677,8 +1757,17 @@ class Parser:
this = self._parse_bracket(this) this = self._parse_bracket(this)
while self._match_set(self.COLUMN_OPERATORS): while self._match_set(self.COLUMN_OPERATORS):
op = self.COLUMN_OPERATORS.get(self._prev.token_type) op_token = self._prev.token_type
field = self._parse_star() or self._parse_function() or self._parse_id_var() op = self.COLUMN_OPERATORS.get(op_token)
if op_token == TokenType.DCOLON:
field = self._parse_types()
if not field:
self.raise_error("Expected type")
elif op:
field = exp.Literal.string(self._advance() or self._prev.text)
else:
field = self._parse_star() or self._parse_function() or self._parse_id_var()
if isinstance(field, exp.Func): if isinstance(field, exp.Func):
# bigquery allows function calls like x.y.count(...) # bigquery allows function calls like x.y.count(...)
@ -1687,7 +1776,7 @@ class Parser:
this = self._replace_columns_with_dots(this) this = self._replace_columns_with_dots(this)
if op: if op:
this = op(self, this, exp.Literal.string(field.name)) this = op(self, this, field)
elif isinstance(this, exp.Column) and not this.table: elif isinstance(this, exp.Column) and not this.table:
this = self.expression(exp.Column, this=field, table=this.this) this = self.expression(exp.Column, this=field, table=this.this)
else: else:
@ -1808,11 +1897,10 @@ class Parser:
if not self._match(TokenType.ARROW): if not self._match(TokenType.ARROW):
self._retreat(index) self._retreat(index)
distinct = self._match(TokenType.DISTINCT) if self._match(TokenType.DISTINCT):
this = self._parse_conjunction() this = self.expression(exp.Distinct, expressions=self._parse_csv(self._parse_conjunction))
else:
if distinct: this = self._parse_conjunction()
this = self.expression(exp.Distinct, this=this)
if self._match(TokenType.IGNORE_NULLS): if self._match(TokenType.IGNORE_NULLS):
this = self.expression(exp.IgnoreNulls, this=this) this = self.expression(exp.IgnoreNulls, this=this)
@ -2112,6 +2200,8 @@ class Parser:
this = self.expression(exp.Filter, this=this, expression=self._parse_where()) this = self.expression(exp.Filter, this=this, expression=self._parse_where())
self._match_r_paren() self._match_r_paren()
# T-SQL allows the OVER (...) syntax after WITHIN GROUP.
# https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
if self._match(TokenType.WITHIN_GROUP): if self._match(TokenType.WITHIN_GROUP):
self._match_l_paren() self._match_l_paren()
this = self.expression( this = self.expression(
@ -2120,7 +2210,6 @@ class Parser:
expression=self._parse_order(), expression=self._parse_order(),
) )
self._match_r_paren() self._match_r_paren()
return this
# SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
# Some dialects choose to implement and some do not. # Some dialects choose to implement and some do not.
@ -2366,6 +2455,16 @@ class Parser:
if not self._match(TokenType.R_PAREN): if not self._match(TokenType.R_PAREN):
self.raise_error("Expecting )") self.raise_error("Expecting )")
def _match_text(self, *texts):
index = self._index
for text in texts:
if self._curr and self._curr.text.upper() == text:
self._advance()
else:
self._retreat(index)
return False
return True
def _replace_columns_with_dots(self, this): def _replace_columns_with_dots(self, this):
if isinstance(this, exp.Dot): if isinstance(this, exp.Dot):
exp.replace_children(this, self._replace_columns_with_dots) exp.replace_children(this, self._replace_columns_with_dots)

View file

@ -3,7 +3,7 @@ import math
from sqlglot import alias, exp from sqlglot import alias, exp
from sqlglot.errors import UnsupportedError from sqlglot.errors import UnsupportedError
from sqlglot.optimizer.simplify import simplify from sqlglot.optimizer.eliminate_joins import join_condition
class Plan: class Plan:
@ -236,40 +236,12 @@ class Join(Step):
step = Join() step = Join()
for join in joins: for join in joins:
name = join.this.alias source_key, join_key, condition = join_condition(join)
on = join.args.get("on") or exp.TRUE step.joins[join.this.alias_or_name] = {
source_key = []
join_key = []
# find the join keys
# SELECT
# FROM x
# JOIN y
# ON x.a = y.b AND y.b > 1
#
# should pull y.b as the join key and x.a as the source key
for condition in on.flatten() if isinstance(on, exp.And) else [on]:
if isinstance(condition, exp.EQ):
left, right = condition.unnest_operands()
left_tables = exp.column_table_names(left)
right_tables = exp.column_table_names(right)
if name in left_tables and name not in right_tables:
join_key.append(left)
source_key.append(right)
condition.replace(exp.TRUE)
elif name in right_tables and name not in left_tables:
join_key.append(right)
source_key.append(left)
condition.replace(exp.TRUE)
on = simplify(on)
step.joins[name] = {
"side": join.side, "side": join.side,
"join_key": join_key, "join_key": join_key,
"source_key": source_key, "source_key": source_key,
"condition": None if on == exp.TRUE else on, "condition": condition,
} }
step.add_dependency(Scan.from_expression(join.this, ctes)) step.add_dependency(Scan.from_expression(join.this, ctes))

View file

@ -123,6 +123,7 @@ class TokenType(AutoName):
CLUSTER_BY = auto() CLUSTER_BY = auto()
COLLATE = auto() COLLATE = auto()
COMMENT = auto() COMMENT = auto()
COMMENT_ON = auto()
COMMIT = auto() COMMIT = auto()
CONSTRAINT = auto() CONSTRAINT = auto()
CREATE = auto() CREATE = auto()
@ -133,13 +134,14 @@ class TokenType(AutoName):
CURRENT_ROW = auto() CURRENT_ROW = auto()
CURRENT_TIME = auto() CURRENT_TIME = auto()
CURRENT_TIMESTAMP = auto() CURRENT_TIMESTAMP = auto()
DIV = auto()
DEFAULT = auto() DEFAULT = auto()
DELETE = auto() DELETE = auto()
DESC = auto() DESC = auto()
DESCRIBE = auto()
DETERMINISTIC = auto() DETERMINISTIC = auto()
DISTINCT = auto() DISTINCT = auto()
DISTRIBUTE_BY = auto() DISTRIBUTE_BY = auto()
DIV = auto()
DROP = auto() DROP = auto()
ELSE = auto() ELSE = auto()
END = auto() END = auto()
@ -189,6 +191,8 @@ class TokenType(AutoName):
LEFT = auto() LEFT = auto()
LIKE = auto() LIKE = auto()
LIMIT = auto() LIMIT = auto()
LOAD_DATA = auto()
LOCAL = auto()
LOCATION = auto() LOCATION = auto()
MAP = auto() MAP = auto()
MATERIALIZED = auto() MATERIALIZED = auto()
@ -196,6 +200,7 @@ class TokenType(AutoName):
NATURAL = auto() NATURAL = auto()
NEXT = auto() NEXT = auto()
NO_ACTION = auto() NO_ACTION = auto()
NOTNULL = auto()
NULL = auto() NULL = auto()
NULLS_FIRST = auto() NULLS_FIRST = auto()
NULLS_LAST = auto() NULLS_LAST = auto()
@ -436,13 +441,14 @@ class Tokenizer(metaclass=_Tokenizer):
"CURRENT_DATE": TokenType.CURRENT_DATE, "CURRENT_DATE": TokenType.CURRENT_DATE,
"CURRENT ROW": TokenType.CURRENT_ROW, "CURRENT ROW": TokenType.CURRENT_ROW,
"CURRENT_TIMESTAMP": TokenType.CURRENT_TIMESTAMP, "CURRENT_TIMESTAMP": TokenType.CURRENT_TIMESTAMP,
"DIV": TokenType.DIV,
"DEFAULT": TokenType.DEFAULT, "DEFAULT": TokenType.DEFAULT,
"DELETE": TokenType.DELETE, "DELETE": TokenType.DELETE,
"DESC": TokenType.DESC, "DESC": TokenType.DESC,
"DESCRIBE": TokenType.DESCRIBE,
"DETERMINISTIC": TokenType.DETERMINISTIC, "DETERMINISTIC": TokenType.DETERMINISTIC,
"DISTINCT": TokenType.DISTINCT, "DISTINCT": TokenType.DISTINCT,
"DISTRIBUTE BY": TokenType.DISTRIBUTE_BY, "DISTRIBUTE BY": TokenType.DISTRIBUTE_BY,
"DIV": TokenType.DIV,
"DROP": TokenType.DROP, "DROP": TokenType.DROP,
"ELSE": TokenType.ELSE, "ELSE": TokenType.ELSE,
"END": TokenType.END, "END": TokenType.END,
@ -487,12 +493,15 @@ class Tokenizer(metaclass=_Tokenizer):
"LEFT": TokenType.LEFT, "LEFT": TokenType.LEFT,
"LIKE": TokenType.LIKE, "LIKE": TokenType.LIKE,
"LIMIT": TokenType.LIMIT, "LIMIT": TokenType.LIMIT,
"LOAD DATA": TokenType.LOAD_DATA,
"LOCAL": TokenType.LOCAL,
"LOCATION": TokenType.LOCATION, "LOCATION": TokenType.LOCATION,
"MATERIALIZED": TokenType.MATERIALIZED, "MATERIALIZED": TokenType.MATERIALIZED,
"NATURAL": TokenType.NATURAL, "NATURAL": TokenType.NATURAL,
"NEXT": TokenType.NEXT, "NEXT": TokenType.NEXT,
"NO ACTION": TokenType.NO_ACTION, "NO ACTION": TokenType.NO_ACTION,
"NOT": TokenType.NOT, "NOT": TokenType.NOT,
"NOTNULL": TokenType.NOTNULL,
"NULL": TokenType.NULL, "NULL": TokenType.NULL,
"NULLS FIRST": TokenType.NULLS_FIRST, "NULLS FIRST": TokenType.NULLS_FIRST,
"NULLS LAST": TokenType.NULLS_LAST, "NULLS LAST": TokenType.NULLS_LAST,
@ -530,6 +539,7 @@ class Tokenizer(metaclass=_Tokenizer):
"ROLLUP": TokenType.ROLLUP, "ROLLUP": TokenType.ROLLUP,
"ROW": TokenType.ROW, "ROW": TokenType.ROW,
"ROWS": TokenType.ROWS, "ROWS": TokenType.ROWS,
"SCHEMA": TokenType.SCHEMA,
"SEED": TokenType.SEED, "SEED": TokenType.SEED,
"SELECT": TokenType.SELECT, "SELECT": TokenType.SELECT,
"SEMI": TokenType.SEMI, "SEMI": TokenType.SEMI,
@ -629,6 +639,7 @@ class Tokenizer(metaclass=_Tokenizer):
TokenType.ANALYZE, TokenType.ANALYZE,
TokenType.BEGIN, TokenType.BEGIN,
TokenType.CALL, TokenType.CALL,
TokenType.COMMENT_ON,
TokenType.COMMIT, TokenType.COMMIT,
TokenType.EXPLAIN, TokenType.EXPLAIN,
TokenType.OPTIMIZE, TokenType.OPTIMIZE,

View file

@ -164,7 +164,7 @@ class TestBigQuery(Validator):
"CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)", "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)",
write={ write={
"bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRING>)", "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRING>)",
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b TEXT>)", "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)",
@ -174,6 +174,7 @@ class TestBigQuery(Validator):
"CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
write={ write={
"bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a BIGINT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a BIGINT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a BIGINT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: LONG, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: LONG, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)",

View file

@ -1056,6 +1056,7 @@ class TestDialect(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR, v2 VARCHAR2, nv NVARCHAR, nv2 NVARCHAR2)", "CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR, v2 VARCHAR2, nv NVARCHAR, nv2 NVARCHAR2)",
write={ write={
"duckdb": "CREATE TABLE t (c CHAR, nc CHAR, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT)",
"hive": "CREATE TABLE t (c CHAR, nc CHAR, v1 STRING, v2 STRING, nv STRING, nv2 STRING)", "hive": "CREATE TABLE t (c CHAR, nc CHAR, v1 STRING, v2 STRING, nv STRING, nv2 STRING)",
"oracle": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR2, v2 VARCHAR2, nv NVARCHAR2, nv2 NVARCHAR2)", "oracle": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR2, v2 VARCHAR2, nv NVARCHAR2, nv2 NVARCHAR2)",
"postgres": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR, v2 VARCHAR, nv VARCHAR, nv2 VARCHAR)", "postgres": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR, v2 VARCHAR, nv VARCHAR, nv2 VARCHAR)",
@ -1096,6 +1097,7 @@ class TestDialect(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024))", "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024))",
write={ write={
"duckdb": "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024))",
"hive": "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 STRING, c2 STRING(1024))", "hive": "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 STRING, c2 STRING(1024))",
"oracle": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024))", "oracle": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024))",
"postgres": "CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024))", "postgres": "CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024))",

View file

@ -65,6 +65,23 @@ class TestDuckDB(Validator):
) )
def test_duckdb(self): def test_duckdb(self):
self.validate_all(
"CREATE TABLE IF NOT EXISTS table (cola INT, colb STRING) USING ICEBERG PARTITIONED BY (colb)",
write={
"duckdb": "CREATE TABLE IF NOT EXISTS table (cola INT, colb TEXT)",
},
)
self.validate_all(
"COL::BIGINT[]",
write={
"duckdb": "CAST(COL AS BIGINT[])",
"presto": "CAST(COL AS ARRAY(BIGINT))",
"hive": "CAST(COL AS ARRAY<BIGINT>)",
"spark": "CAST(COL AS ARRAY<LONG>)",
},
)
self.validate_all( self.validate_all(
"LIST_VALUE(0, 1, 2)", "LIST_VALUE(0, 1, 2)",
read={ read={

View file

@ -126,22 +126,24 @@ class TestHive(Validator):
) )
def test_ddl(self): def test_ddl(self):
self.validate_all(
"CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
write={
"presto": "CREATE TABLE test WITH (FORMAT='parquet', x='1', Z='2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
},
)
self.validate_all( self.validate_all(
"CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
write={ write={
"duckdb": "CREATE TABLE x (w TEXT)", # Partition columns should exist in table
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
}, },
) )
self.validate_all(
"CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
write={
"duckdb": "CREATE TABLE test AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='parquet', x='1', Z='2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1",
},
)
def test_lateral_view(self): def test_lateral_view(self):
self.validate_all( self.validate_all(

View file

@ -8,6 +8,7 @@ class TestMySQL(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'",
write={ write={
"duckdb": "CREATE TABLE z (a INT)",
"mysql": "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", "mysql": "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'",
"spark": "CREATE TABLE z (a INT) COMMENT 'x'", "spark": "CREATE TABLE z (a INT) COMMENT 'x'",
}, },

View file

@ -68,10 +68,12 @@ class TestPostgres(Validator):
self.validate_identity("SELECT TRIM(' X' FROM ' XXX ')") self.validate_identity("SELECT TRIM(' X' FROM ' XXX ')")
self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ' COLLATE utf8_bin)") self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ' COLLATE utf8_bin)")
self.validate_identity("SELECT TO_TIMESTAMP(1284352323.5), TO_TIMESTAMP('05 Dec 2000', 'DD Mon YYYY')") self.validate_identity("SELECT TO_TIMESTAMP(1284352323.5), TO_TIMESTAMP('05 Dec 2000', 'DD Mon YYYY')")
self.validate_identity("COMMENT ON TABLE mytable IS 'this'")
self.validate_all( self.validate_all(
"CREATE TABLE x (a UUID, b BYTEA)", "CREATE TABLE x (a UUID, b BYTEA)",
write={ write={
"duckdb": "CREATE TABLE x (a UUID, b BINARY)",
"presto": "CREATE TABLE x (a UUID, b VARBINARY)", "presto": "CREATE TABLE x (a UUID, b VARBINARY)",
"hive": "CREATE TABLE x (a UUID, b BINARY)", "hive": "CREATE TABLE x (a UUID, b BINARY)",
"spark": "CREATE TABLE x (a UUID, b BINARY)", "spark": "CREATE TABLE x (a UUID, b BINARY)",
@ -163,3 +165,42 @@ class TestPostgres(Validator):
"postgres": "SELECT p1.id, p2.id, v1, v2 FROM polygons p1, polygons p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id != p2.id", "postgres": "SELECT p1.id, p2.id, v1, v2 FROM polygons p1, polygons p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id != p2.id",
}, },
) )
self.validate_all(
"SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted IS NULL",
read={"postgres": "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE deleted NOTNULL"},
)
self.validate_all(
"SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted IS NULL",
read={"postgres": "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted ISNULL"},
)
self.validate_all(
"'[1,2,3]'::json->2",
write={"postgres": "CAST('[1,2,3]' AS JSON)->'2'"},
)
self.validate_all(
"""'{"a":1,"b":2}'::json->'b'""",
write={"postgres": """CAST('{"a":1,"b":2}' AS JSON)->'b'"""},
)
self.validate_all(
"""'{"x": {"y": 1}}'::json->'x'->'y'""", write={"postgres": """CAST('{"x": {"y": 1}}' AS JSON)->'x'->'y'"""}
)
self.validate_all(
"""'{"x": {"y": 1}}'::json->'x'::json->'y'""",
write={"postgres": """CAST(CAST('{"x": {"y": 1}}' AS JSON)->'x' AS JSON)->'y'"""},
)
self.validate_all(
"""'[1,2,3]'::json->>2""",
write={"postgres": "CAST('[1,2,3]' AS JSON)->>'2'"},
)
self.validate_all(
"""'{"a":1,"b":2}'::json->>'b'""",
write={"postgres": """CAST('{"a":1,"b":2}' AS JSON)->>'b'"""},
)
self.validate_all(
"""'{"a":[1,2,3],"b":[4,5,6]}'::json#>'{a,2}'""",
write={"postgres": """CAST('{"a":[1,2,3],"b":[4,5,6]}' AS JSON)#>'{a,2}'"""},
)
self.validate_all(
"""'{"a":[1,2,3],"b":[4,5,6]}'::json#>>'{a,2}'""",
write={"postgres": """CAST('{"a":[1,2,3],"b":[4,5,6]}' AS JSON)#>>'{a,2}'"""},
)

View file

@ -10,7 +10,7 @@ class TestPresto(Validator):
"CAST(a AS ARRAY(INT))", "CAST(a AS ARRAY(INT))",
write={ write={
"bigquery": "CAST(a AS ARRAY<INT64>)", "bigquery": "CAST(a AS ARRAY<INT64>)",
"duckdb": "CAST(a AS ARRAY<INT>)", "duckdb": "CAST(a AS INT[])",
"presto": "CAST(a AS ARRAY(INTEGER))", "presto": "CAST(a AS ARRAY(INTEGER))",
"spark": "CAST(a AS ARRAY<INT>)", "spark": "CAST(a AS ARRAY<INT>)",
}, },
@ -28,7 +28,7 @@ class TestPresto(Validator):
"CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))",
write={ write={
"bigquery": "CAST([1, 2] AS ARRAY<INT64>)", "bigquery": "CAST([1, 2] AS ARRAY<INT64>)",
"duckdb": "CAST(LIST_VALUE(1, 2) AS ARRAY<BIGINT>)", "duckdb": "CAST(LIST_VALUE(1, 2) AS BIGINT[])",
"presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))",
"spark": "CAST(ARRAY(1, 2) AS ARRAY<LONG>)", "spark": "CAST(ARRAY(1, 2) AS ARRAY<LONG>)",
}, },
@ -37,7 +37,7 @@ class TestPresto(Validator):
"CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))", "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INT,INT))",
write={ write={
"bigquery": "CAST(MAP([1], [1]) AS MAP<INT64, INT64>)", "bigquery": "CAST(MAP([1], [1]) AS MAP<INT64, INT64>)",
"duckdb": "CAST(MAP(LIST_VALUE(1), LIST_VALUE(1)) AS MAP<INT, INT>)", "duckdb": "CAST(MAP(LIST_VALUE(1), LIST_VALUE(1)) AS MAP(INT, INT))",
"presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))", "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))",
"hive": "CAST(MAP(1, 1) AS MAP<INT, INT>)", "hive": "CAST(MAP(1, 1) AS MAP<INT, INT>)",
"spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP<INT, INT>)", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP<INT, INT>)",
@ -47,7 +47,7 @@ class TestPresto(Validator):
"CAST(MAP(ARRAY['a','b','c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INT)))", "CAST(MAP(ARRAY['a','b','c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INT)))",
write={ write={
"bigquery": "CAST(MAP(['a', 'b', 'c'], [[1], [2], [3]]) AS MAP<STRING, ARRAY<INT64>>)", "bigquery": "CAST(MAP(['a', 'b', 'c'], [[1], [2], [3]]) AS MAP<STRING, ARRAY<INT64>>)",
"duckdb": "CAST(MAP(LIST_VALUE('a', 'b', 'c'), LIST_VALUE(LIST_VALUE(1), LIST_VALUE(2), LIST_VALUE(3))) AS MAP<TEXT, ARRAY<INT>>)", "duckdb": "CAST(MAP(LIST_VALUE('a', 'b', 'c'), LIST_VALUE(LIST_VALUE(1), LIST_VALUE(2), LIST_VALUE(3))) AS MAP(TEXT, INT[]))",
"presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))", "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))",
"hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP<STRING, ARRAY<INT>>)", "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP<STRING, ARRAY<INT>>)",
"spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>)", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>)",
@ -171,6 +171,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1",
write={ write={
"duckdb": "CREATE TABLE test AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1", "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
@ -179,6 +180,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1",
write={ write={
"duckdb": "CREATE TABLE test AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET', X='1', Z='2') AS SELECT 1", "presto": "CREATE TABLE test WITH (FORMAT='PARQUET', X='1', Z='2') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1",
@ -187,6 +189,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
write={ write={
"duckdb": "CREATE TABLE x (w TEXT, y INT, z INT)",
"presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])",
"hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
"spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)",
@ -195,6 +198,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y",
write={ write={
"duckdb": "CREATE TABLE x AS SELECT 1 AS y",
"presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y", "presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y",
"hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
"spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y",
@ -203,6 +207,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))",
write={ write={
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)",
@ -211,6 +216,7 @@ class TestPresto(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))",
write={ write={
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)",

View file

@ -8,6 +8,7 @@ class TestSpark(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)", "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:string>)",
write={ write={
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRING>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRING>)",
@ -17,6 +18,7 @@ class TestSpark(Validator):
"CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:struct<nested_col_a:string, nested_col_b:string>>)", "CREATE TABLE db.example_table (col_a struct<struct_col_a:int, struct_col_b:struct<nested_col_a:string, nested_col_b:string>>)",
write={ write={
"bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "bigquery": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT64, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
"duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))",
"presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))",
"hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)", "hive": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a INT, struct_col_b STRUCT<nested_col_a STRING, nested_col_b STRING>>)",
"spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT<struct_col_a: INT, struct_col_b: STRUCT<nested_col_a: STRING, nested_col_b: STRING>>)",
@ -26,6 +28,7 @@ class TestSpark(Validator):
"CREATE TABLE db.example_table (col_a array<int>, col_b array<array<int>>)", "CREATE TABLE db.example_table (col_a array<int>, col_b array<array<int>>)",
write={ write={
"bigquery": "CREATE TABLE db.example_table (col_a ARRAY<INT64>, col_b ARRAY<ARRAY<INT64>>)", "bigquery": "CREATE TABLE db.example_table (col_a ARRAY<INT64>, col_b ARRAY<ARRAY<INT64>>)",
"duckdb": "CREATE TABLE db.example_table (col_a INT[], col_b INT[][])",
"presto": "CREATE TABLE db.example_table (col_a ARRAY(INTEGER), col_b ARRAY(ARRAY(INTEGER)))", "presto": "CREATE TABLE db.example_table (col_a ARRAY(INTEGER), col_b ARRAY(ARRAY(INTEGER)))",
"hive": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)", "hive": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)",
"spark": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)", "spark": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)",
@ -34,6 +37,7 @@ class TestSpark(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
write={ write={
"duckdb": "CREATE TABLE x",
"presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])", "presto": "CREATE TABLE x WITH (TABLE_FORMAT = 'ICEBERG', PARTITIONED_BY=ARRAY['MONTHS'])",
"hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "hive": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
"spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'",
@ -42,6 +46,7 @@ class TestSpark(Validator):
self.validate_all( self.validate_all(
"CREATE TABLE test STORED AS PARQUET AS SELECT 1", "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
write={ write={
"duckdb": "CREATE TABLE test AS SELECT 1",
"presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1", "presto": "CREATE TABLE test WITH (FORMAT='PARQUET') AS SELECT 1",
"hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1",
"spark": "CREATE TABLE test USING PARQUET AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET AS SELECT 1",
@ -50,6 +55,9 @@ class TestSpark(Validator):
self.validate_all( self.validate_all(
"""CREATE TABLE blah (col_a INT) COMMENT "Test comment: blah" PARTITIONED BY (date STRING) STORED AS ICEBERG TBLPROPERTIES('x' = '1')""", """CREATE TABLE blah (col_a INT) COMMENT "Test comment: blah" PARTITIONED BY (date STRING) STORED AS ICEBERG TBLPROPERTIES('x' = '1')""",
write={ write={
"duckdb": """CREATE TABLE blah (
col_a INT
)""", # Partition columns should exist in table
"presto": """CREATE TABLE blah ( "presto": """CREATE TABLE blah (
col_a INTEGER, col_a INTEGER,
date VARCHAR date VARCHAR

View file

@ -6,6 +6,9 @@ class TestTSQL(Validator):
def test_tsql(self): def test_tsql(self):
self.validate_identity('SELECT "x"."y" FROM foo') self.validate_identity('SELECT "x"."y" FROM foo')
self.validate_identity(
"SELECT DISTINCT DepartmentName, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BaseRate) OVER (PARTITION BY DepartmentName) AS MedianCont FROM dbo.DimEmployee"
)
self.validate_all( self.validate_all(
"SELECT CAST([a].[b] AS SMALLINT) FROM foo", "SELECT CAST([a].[b] AS SMALLINT) FROM foo",

View file

@ -50,7 +50,9 @@ a.B()
a['x'].C() a['x'].C()
int.x int.x
map.x map.x
SELECT call.x
a.b.INT(1.234) a.b.INT(1.234)
INT(x / 100)
x IN (-1, 1) x IN (-1, 1)
x IN ('a', 'a''a') x IN ('a', 'a''a')
x IN ((1)) x IN ((1))
@ -147,6 +149,7 @@ SELECT 1 AS count FROM test
SELECT 1 AS comment FROM test SELECT 1 AS comment FROM test
SELECT 1 AS numeric FROM test SELECT 1 AS numeric FROM test
SELECT 1 AS number FROM test SELECT 1 AS number FROM test
SELECT 1 AS number # annotation
SELECT t.count SELECT t.count
SELECT DISTINCT x FROM test SELECT DISTINCT x FROM test
SELECT DISTINCT x, y FROM test SELECT DISTINCT x, y FROM test
@ -159,6 +162,8 @@ SELECT TIMESTAMP(DATE_TRUNC(DATE(time_field), MONTH)) AS time_value FROM "table"
SELECT GREATEST((3 + 1), LEAST(3, 4)) SELECT GREATEST((3 + 1), LEAST(3, 4))
SELECT TRANSFORM(a, b -> b) AS x SELECT TRANSFORM(a, b -> b) AS x
SELECT AGGREGATE(a, (a, b) -> a + b) AS x SELECT AGGREGATE(a, (a, b) -> a + b) AS x
SELECT COUNT(DISTINCT a, b)
SELECT COUNT(DISTINCT a, b + 1)
SELECT SUM(DISTINCT x) SELECT SUM(DISTINCT x)
SELECT SUM(x IGNORE NULLS) AS x SELECT SUM(x IGNORE NULLS) AS x
SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x
@ -485,6 +490,9 @@ CREATE INDEX abc ON t (a)
CREATE INDEX abc ON t (a, b, b) CREATE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX abc ON t (a, b, b) CREATE UNIQUE INDEX abc ON t (a, b, b)
CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl (a, b) CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl (a, b)
CREATE SCHEMA x
CREATE SCHEMA IF NOT EXISTS y
DESCRIBE x
DROP INDEX a.b.c DROP INDEX a.b.c
CACHE TABLE x CACHE TABLE x
CACHE LAZY TABLE x CACHE LAZY TABLE x
@ -524,6 +532,15 @@ INSERT INTO x VALUES (1, 'a', 2.0), (1, 'a', 3.0), (X(), y[1], z.x)
INSERT INTO y (a, b, c) SELECT a, b, c FROM x INSERT INTO y (a, b, c) SELECT a, b, c FROM x
INSERT OVERWRITE TABLE x IF EXISTS SELECT * FROM y INSERT OVERWRITE TABLE x IF EXISTS SELECT * FROM y
INSERT OVERWRITE TABLE a.b IF EXISTS SELECT * FROM y INSERT OVERWRITE TABLE a.b IF EXISTS SELECT * FROM y
INSERT OVERWRITE DIRECTORY 'x' SELECT 1
INSERT OVERWRITE LOCAL DIRECTORY 'x' SELECT 1
INSERT OVERWRITE LOCAL DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' COLLECTION ITEMS TERMINATED BY '2' MAP KEYS TERMINATED BY '3' LINES TERMINATED BY '4' NULL DEFINED AS '5' SELECT 1
LOAD DATA INPATH 'x' INTO TABLE y PARTITION(ds='yyyy')
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy')
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') INPUTFORMAT 'y'
LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds='yyyy') INPUTFORMAT 'y' SERDE 'z'
LOAD DATA INPATH 'x' INTO TABLE y INPUTFORMAT 'y' SERDE 'z'
LOAD DATA INPATH 'x' INTO TABLE y.b INPUTFORMAT 'y' SERDE 'z'
SELECT 1 FROM PARQUET_SCAN('/x/y/*') AS y SELECT 1 FROM PARQUET_SCAN('/x/y/*') AS y
UNCACHE TABLE x UNCACHE TABLE x
UNCACHE TABLE IF EXISTS x UNCACHE TABLE IF EXISTS x

View file

@ -0,0 +1,48 @@
# title: CTE
WITH q AS (
SELECT
a
FROM x
)
SELECT
a
FROM x;
SELECT
a
FROM x;
# title: Nested CTE
SELECT
a
FROM (
WITH q AS (
SELECT
a
FROM x
)
SELECT a FROM x
);
SELECT
a
FROM (
SELECT
a
FROM x
);
# title: Chained CTE
WITH q AS (
SELECT
a
FROM x
), r AS (
SELECT
a
FROM q
)
SELECT
a
FROM x;
SELECT
a
FROM x;

View file

@ -0,0 +1,317 @@
# title: Remove left join on distinct derived table
SELECT
x.a
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x;
# title: Remove left join on grouped derived table
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b,
SUM(y.c)
FROM y
GROUP BY y.b
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x;
# title: Remove left join on aggregate derived table
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
SUM(y.b) AS b
FROM y
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x;
# title: Noop - not all distinct columns in condition
SELECT
x.a
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b,
y.c
FROM y
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b,
y.c
FROM y
) AS y
ON x.b = y.b;
# title: Noop - not all grouped columns in condition
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b,
y.c
FROM y
GROUP BY
y.b,
y.c
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b,
y.c
FROM y
GROUP BY
y.b,
y.c
) AS y
ON x.b = y.b;
# title: Noop - not left join
SELECT
x.a
FROM x
JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x
JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
# title: Noop - unqualified columns
SELECT
a
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
SELECT
a
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
# title: Noop - cross join
SELECT
a
FROM x
CROSS JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y;
SELECT
a
FROM x
CROSS JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y;
# title: Noop - column is used
SELECT
x.a,
y.b
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
SELECT
x.a,
y.b
FROM x
LEFT JOIN (
SELECT DISTINCT
y.b
FROM y
) AS y
ON x.b = y.b;
# title: Multiple group by columns
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b AS b,
y.c + 1 AS d,
COUNT(1)
FROM y
GROUP BY y.b, y.c + 1
) AS y
ON x.b = y.b
AND 1 = y.d;
SELECT
x.a
FROM x;
# title: Chained left joins
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b AS b
FROM y
GROUP BY y.b
) AS y
ON x.b = y.b
LEFT JOIN (
SELECT
y.b AS c
FROM y
GROUP BY y.b
) AS z
ON y.b = z.c;
SELECT
x.a
FROM x;
# title: CTE
WITH z AS (
SELECT DISTINCT
y.b
FROM y
)
SELECT
x.a
FROM x
LEFT JOIN z
ON x.b = z.b;
WITH z AS (
SELECT DISTINCT
y.b
FROM y
)
SELECT
x.a
FROM x;
# title: Noop - Not all grouped expressions are in outputs
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b
FROM y
GROUP BY
y.b,
y.c
) AS y
ON x.b = y.b;
SELECT
x.a
FROM x
LEFT JOIN (
SELECT
y.b
FROM y
GROUP BY
y.b,
y.c
) AS y
ON x.b = y.b;
# title: Cross join on aggregate derived table
SELECT
x.a
FROM x
CROSS JOIN (
SELECT
SUM(y.b) AS b
FROM y
) AS y;
SELECT
x.a
FROM x;
# title: Cross join on derived table with LIMIT 1
SELECT
x.a
FROM x
CROSS JOIN (
SELECT
y.b AS b
FROM y
LIMIT 1
) AS y;
SELECT
x.a
FROM x;
# title: Cross join on derived table with no FROM clause
SELECT
x.a
FROM x
CROSS JOIN (
SELECT
1 AS b,
2 AS c
) AS y;
SELECT
x.a
FROM x;
# title: Noop - cross join on non-aggregate subquery
SELECT
x.a
FROM x
CROSS JOIN (
SELECT
y.b
FROM y
) AS y;
SELECT
x.a
FROM x
CROSS JOIN (
SELECT
y.b
FROM y
) AS y;

View file

@ -187,3 +187,103 @@ FROM (
) AS x ) AS x
) AS x; ) AS x;
SELECT /*+ BROADCAST(x) */ x.a AS a, x.c AS c FROM (SELECT x.a AS a, COUNT(1) AS c FROM x AS x GROUP BY x.a) AS x; SELECT /*+ BROADCAST(x) */ x.a AS a, x.c AS c FROM (SELECT x.a AS a, COUNT(1) AS c FROM x AS x GROUP BY x.a) AS x;
# title: Test preventing merge of window expressions where clause
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
t1.a,
t1.b
FROM
t1
WHERE
row_num = 1;
WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.a AS a, t1.b AS b FROM t1 WHERE t1.row_num = 1;
# title: Test preventing merge of window expressions join clause
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
t1.a,
t1.b
FROM t1 JOIN y ON t1.a = y.c AND t1.row_num = 1;
WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.a AS a, t1.b AS b FROM t1 JOIN y AS y ON t1.a = y.c AND t1.row_num = 1;
# title: Test preventing merge of window expressions agg function
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
SUM(t1.row_num) as total_rows
FROM
t1;
WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT SUM(t1.row_num) AS total_rows FROM t1;
# title: Test prevent merging of window if in group by func
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
t1.row_num AS row_num,
SUM(t1.a) AS total
FROM
t1
GROUP BY t1.row_num
ORDER BY t1.row_num;
WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.row_num AS row_num, SUM(t1.a) AS total FROM t1 GROUP BY t1.row_num ORDER BY t1.row_num;
# title: Test prevent merging of window if in order by func
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
t1.row_num AS row_num,
t1.a AS a
FROM
t1
ORDER BY t1.row_num, t1.a;
WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.row_num AS row_num, t1.a AS a FROM t1 ORDER BY t1.row_num, t1.a;
# title: Test allow merging of window function
with t1 as (
SELECT
x.a,
x.b,
ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num
FROM
x
)
SELECT
t1.a,
t1.b,
t1.row_num
FROM
t1;
SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x;

View file

@ -105,9 +105,7 @@ LEFT JOIN "_u_0" AS "_u_0"
JOIN "y" AS "y" JOIN "y" AS "y"
ON "x"."b" = "y"."b" ON "x"."b" = "y"."b"
WHERE WHERE
"_u_0"."_col_0" >= 0 "_u_0"."_col_0" >= 0 AND "x"."a" > 1 AND NOT "_u_0"."_u_1" IS NULL
AND "x"."a" > 1
AND NOT "_u_0"."_u_1" IS NULL
GROUP BY GROUP BY
"x"."a"; "x"."a";

View file

@ -30,3 +30,6 @@ SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON y.
SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT * FROM y AS y) AS y WHERE x.a = 1 AND x.b = 1 AND y.a = x.a AND y.a = 1; SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT * FROM y AS y) AS y WHERE x.a = 1 AND x.b = 1 AND y.a = x.a AND y.a = 1;
SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON y.a = x.a AND TRUE WHERE x.a = 1 AND x.b = 1 AND TRUE AND TRUE; SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON y.a = x.a AND TRUE WHERE x.a = 1 AND x.b = 1 AND TRUE AND TRUE;
with t1 as (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1;
WITH t1 AS (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1;

View file

@ -28,13 +28,15 @@ SELECT
SUM("lineitem"."l_quantity") AS "sum_qty", SUM("lineitem"."l_quantity") AS "sum_qty",
SUM("lineitem"."l_extendedprice") AS "sum_base_price", SUM("lineitem"."l_extendedprice") AS "sum_base_price",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "sum_disc_price", )) AS "sum_disc_price",
SUM("lineitem"."l_extendedprice" * ( SUM(
"lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
) * ( ) * (
1 + "lineitem"."l_tax" 1 + "lineitem"."l_tax"
)) AS "sum_charge", )
) AS "sum_charge",
AVG("lineitem"."l_quantity") AS "avg_qty", AVG("lineitem"."l_quantity") AS "avg_qty",
AVG("lineitem"."l_extendedprice") AS "avg_price", AVG("lineitem"."l_extendedprice") AS "avg_price",
AVG("lineitem"."l_discount") AS "avg_disc", AVG("lineitem"."l_discount") AS "avg_disc",
@ -186,7 +188,7 @@ limit
SELECT SELECT
"lineitem"."l_orderkey" AS "l_orderkey", "lineitem"."l_orderkey" AS "l_orderkey",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "revenue", )) AS "revenue",
CAST("orders"."o_orderdate" AS TEXT) AS "o_orderdate", CAST("orders"."o_orderdate" AS TEXT) AS "o_orderdate",
"orders"."o_shippriority" AS "o_shippriority" "orders"."o_shippriority" AS "o_shippriority"
@ -286,7 +288,7 @@ order by
SELECT SELECT
"nation"."n_name" AS "n_name", "nation"."n_name" AS "n_name",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "revenue" )) AS "revenue"
FROM "customer" AS "customer" FROM "customer" AS "customer"
JOIN "orders" AS "orders" JOIN "orders" AS "orders"
@ -377,15 +379,14 @@ WITH "n1" AS (
"nation"."n_name" AS "n_name" "nation"."n_name" AS "n_name"
FROM "nation" AS "nation" FROM "nation" AS "nation"
WHERE WHERE
"nation"."n_name" = 'FRANCE' "nation"."n_name" = 'FRANCE' OR "nation"."n_name" = 'GERMANY'
OR "nation"."n_name" = 'GERMANY'
) )
SELECT SELECT
"n1"."n_name" AS "supp_nation", "n1"."n_name" AS "supp_nation",
"n2"."n_name" AS "cust_nation", "n2"."n_name" AS "cust_nation",
EXTRACT(year FROM "lineitem"."l_shipdate") AS "l_year", EXTRACT(year FROM "lineitem"."l_shipdate") AS "l_year",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "revenue" )) AS "revenue"
FROM "supplier" AS "supplier" FROM "supplier" AS "supplier"
JOIN "lineitem" AS "lineitem" JOIN "lineitem" AS "lineitem"
@ -400,12 +401,10 @@ JOIN "n1" AS "n1"
JOIN "n1" AS "n2" JOIN "n1" AS "n2"
ON "customer"."c_nationkey" = "n2"."n_nationkey" ON "customer"."c_nationkey" = "n2"."n_nationkey"
AND ( AND (
"n1"."n_name" = 'FRANCE' "n1"."n_name" = 'FRANCE' OR "n2"."n_name" = 'FRANCE'
OR "n2"."n_name" = 'FRANCE'
) )
AND ( AND (
"n1"."n_name" = 'GERMANY' "n1"."n_name" = 'GERMANY' OR "n2"."n_name" = 'GERMANY'
OR "n2"."n_name" = 'GERMANY'
) )
GROUP BY GROUP BY
"n1"."n_name", "n1"."n_name",
@ -458,14 +457,16 @@ order by
o_year; o_year;
SELECT SELECT
EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", EXTRACT(year FROM "orders"."o_orderdate") AS "o_year",
SUM(CASE SUM(
CASE
WHEN "nation_2"."n_name" = 'BRAZIL' WHEN "nation_2"."n_name" = 'BRAZIL'
THEN "lineitem"."l_extendedprice" * ( THEN "lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
) )
ELSE 0 ELSE 0
END) / SUM("lineitem"."l_extendedprice" * ( END
1 - "lineitem"."l_discount" ) / SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount"
)) AS "mkt_share" )) AS "mkt_share"
FROM "part" AS "part" FROM "part" AS "part"
JOIN "region" AS "region" JOIN "region" AS "region"
@ -529,9 +530,11 @@ order by
SELECT SELECT
"nation"."n_name" AS "nation", "nation"."n_name" AS "nation",
EXTRACT(year FROM "orders"."o_orderdate") AS "o_year", EXTRACT(year FROM "orders"."o_orderdate") AS "o_year",
SUM("lineitem"."l_extendedprice" * ( SUM(
"lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
) - "partsupp"."ps_supplycost" * "lineitem"."l_quantity") AS "sum_profit" ) - "partsupp"."ps_supplycost" * "lineitem"."l_quantity"
) AS "sum_profit"
FROM "part" AS "part" FROM "part" AS "part"
JOIN "lineitem" AS "lineitem" JOIN "lineitem" AS "lineitem"
ON "part"."p_partkey" = "lineitem"."l_partkey" ON "part"."p_partkey" = "lineitem"."l_partkey"
@ -593,7 +596,7 @@ SELECT
"customer"."c_custkey" AS "c_custkey", "customer"."c_custkey" AS "c_custkey",
"customer"."c_name" AS "c_name", "customer"."c_name" AS "c_name",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "revenue", )) AS "revenue",
"customer"."c_acctbal" AS "c_acctbal", "customer"."c_acctbal" AS "c_acctbal",
"nation"."n_name" AS "n_name", "nation"."n_name" AS "n_name",
@ -606,8 +609,7 @@ JOIN "orders" AS "orders"
AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE) AND "orders"."o_orderdate" < CAST('1994-01-01' AS DATE)
AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE) AND "orders"."o_orderdate" >= CAST('1993-10-01' AS DATE)
JOIN "lineitem" AS "lineitem" JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_orderkey" = "orders"."o_orderkey" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_returnflag" = 'R'
AND "lineitem"."l_returnflag" = 'R'
JOIN "nation" AS "nation" JOIN "nation" AS "nation"
ON "customer"."c_nationkey" = "nation"."n_nationkey" ON "customer"."c_nationkey" = "nation"."n_nationkey"
GROUP BY GROUP BY
@ -721,18 +723,20 @@ order by
l_shipmode; l_shipmode;
SELECT SELECT
"lineitem"."l_shipmode" AS "l_shipmode", "lineitem"."l_shipmode" AS "l_shipmode",
SUM(CASE SUM(
WHEN "orders"."o_orderpriority" = '1-URGENT' CASE
OR "orders"."o_orderpriority" = '2-HIGH' WHEN "orders"."o_orderpriority" = '1-URGENT' OR "orders"."o_orderpriority" = '2-HIGH'
THEN 1 THEN 1
ELSE 0 ELSE 0
END) AS "high_line_count", END
SUM(CASE ) AS "high_line_count",
WHEN "orders"."o_orderpriority" <> '1-URGENT' SUM(
AND "orders"."o_orderpriority" <> '2-HIGH' CASE
WHEN "orders"."o_orderpriority" <> '1-URGENT' AND "orders"."o_orderpriority" <> '2-HIGH'
THEN 1 THEN 1
ELSE 0 ELSE 0
END) AS "low_line_count" END
) AS "low_line_count"
FROM "orders" AS "orders" FROM "orders" AS "orders"
JOIN "lineitem" AS "lineitem" JOIN "lineitem" AS "lineitem"
ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate"
@ -813,14 +817,16 @@ where
and l_shipdate >= date '1995-09-01' and l_shipdate >= date '1995-09-01'
and l_shipdate < date '1995-09-01' + interval '1' month; and l_shipdate < date '1995-09-01' + interval '1' month;
SELECT SELECT
100.00 * SUM(CASE 100.00 * SUM(
CASE
WHEN "part"."p_type" LIKE 'PROMO%' WHEN "part"."p_type" LIKE 'PROMO%'
THEN "lineitem"."l_extendedprice" * ( THEN "lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
) )
ELSE 0 ELSE 0
END) / SUM("lineitem"."l_extendedprice" * ( END
1 - "lineitem"."l_discount" ) / SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount"
)) AS "promo_revenue" )) AS "promo_revenue"
FROM "lineitem" AS "lineitem" FROM "lineitem" AS "lineitem"
JOIN "part" AS "part" JOIN "part" AS "part"
@ -866,7 +872,7 @@ WITH "revenue" AS (
SELECT SELECT
"lineitem"."l_suppkey" AS "supplier_no", "lineitem"."l_suppkey" AS "supplier_no",
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "total_revenue" )) AS "total_revenue"
FROM "lineitem" AS "lineitem" FROM "lineitem" AS "lineitem"
WHERE WHERE
@ -997,8 +1003,7 @@ JOIN "part" AS "part"
LEFT JOIN "_u_0" AS "_u_0" LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "part"."p_partkey" ON "_u_0"."_u_1" = "part"."p_partkey"
WHERE WHERE
"lineitem"."l_quantity" < "_u_0"."_col_0" "lineitem"."l_quantity" < "_u_0"."_col_0" AND NOT "_u_0"."_u_1" IS NULL;
AND NOT "_u_0"."_u_1" IS NULL;
-------------------------------------- --------------------------------------
-- TPC-H 18 -- TPC-H 18
@ -1114,7 +1119,7 @@ where
); );
SELECT SELECT
SUM("lineitem"."l_extendedprice" * ( SUM("lineitem"."l_extendedprice" * (
1 - "lineitem"."l_discount" 1 - "lineitem"."l_discount"
)) AS "revenue" )) AS "revenue"
FROM "lineitem" AS "lineitem" FROM "lineitem" AS "lineitem"
JOIN "part" AS "part" JOIN "part" AS "part"
@ -1233,8 +1238,7 @@ WITH "_u_0" AS (
"partsupp"."ps_suppkey" AS "ps_suppkey" "partsupp"."ps_suppkey" AS "ps_suppkey"
FROM "partsupp" AS "partsupp" FROM "partsupp" AS "partsupp"
LEFT JOIN "_u_0" AS "_u_0" LEFT JOIN "_u_0" AS "_u_0"
ON "_u_0"."_u_1" = "partsupp"."ps_partkey" ON "_u_0"."_u_1" = "partsupp"."ps_partkey" AND "_u_0"."_u_2" = "partsupp"."ps_suppkey"
AND "_u_0"."_u_2" = "partsupp"."ps_suppkey"
LEFT JOIN "_u_3" AS "_u_3" LEFT JOIN "_u_3" AS "_u_3"
ON "partsupp"."ps_partkey" = "_u_3"."p_partkey" ON "partsupp"."ps_partkey" = "_u_3"."p_partkey"
WHERE WHERE
@ -1252,8 +1256,7 @@ FROM "supplier" AS "supplier"
LEFT JOIN "_u_4" AS "_u_4" LEFT JOIN "_u_4" AS "_u_4"
ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey" ON "supplier"."s_suppkey" = "_u_4"."ps_suppkey"
JOIN "nation" AS "nation" JOIN "nation" AS "nation"
ON "nation"."n_name" = 'CANADA' ON "nation"."n_name" = 'CANADA' AND "supplier"."s_nationkey" = "nation"."n_nationkey"
AND "supplier"."s_nationkey" = "nation"."n_nationkey"
WHERE WHERE
NOT "_u_4"."ps_suppkey" IS NULL NOT "_u_4"."ps_suppkey" IS NULL
ORDER BY ORDER BY
@ -1332,8 +1335,7 @@ LEFT JOIN "_u_0" AS "_u_0"
LEFT JOIN "_u_2" AS "_u_2" LEFT JOIN "_u_2" AS "_u_2"
ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey" ON "_u_2"."l_orderkey" = "lineitem"."l_orderkey"
JOIN "orders" AS "orders" JOIN "orders" AS "orders"
ON "orders"."o_orderkey" = "lineitem"."l_orderkey" ON "orders"."o_orderkey" = "lineitem"."l_orderkey" AND "orders"."o_orderstatus" = 'F'
AND "orders"."o_orderstatus" = 'F'
JOIN "nation" AS "nation" JOIN "nation" AS "nation"
ON "nation"."n_name" = 'SAUDI ARABIA' ON "nation"."n_name" = 'SAUDI ARABIA'
AND "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_nationkey" = "nation"."n_nationkey"

View file

@ -108,14 +108,11 @@ LEFT JOIN (
ARRAY_AGG(y.b) AS _u_13 ARRAY_AGG(y.b) AS _u_13
FROM y FROM y
WHERE WHERE
TRUE TRUE AND TRUE AND TRUE
AND TRUE
AND TRUE
GROUP BY GROUP BY
y.a y.a
) AS "_u_11" ) AS "_u_11"
ON "_u_11"."_u_12" = x.a ON "_u_11"."_u_12" = x.a AND "_u_11"."_u_12" = x.b
AND "_u_11"."_u_12" = x.b
LEFT JOIN ( LEFT JOIN (
SELECT SELECT
y.a AS a y.a AS a
@ -131,38 +128,30 @@ WHERE
AND NOT "_u_1"."b" IS NULL AND NOT "_u_1"."b" IS NULL
AND NOT "_u_2"."a" IS NULL AND NOT "_u_2"."a" IS NULL
AND ( AND (
x.a = "_u_3".b x.a = "_u_3".b AND NOT "_u_3"."_u_4" IS NULL
AND NOT "_u_3"."_u_4" IS NULL
) )
AND ( AND (
x.a > "_u_5".b x.a > "_u_5".b AND NOT "_u_5"."_u_6" IS NULL
AND NOT "_u_5"."_u_6" IS NULL
) )
AND ( AND (
None = "_u_7".a None = "_u_7".a AND NOT "_u_7".a IS NULL
AND NOT "_u_7".a IS NULL
) )
AND NOT ( AND NOT (
x.a = "_u_8".a x.a = "_u_8".a AND NOT "_u_8".a IS NULL
AND NOT "_u_8".a IS NULL
) )
AND ( AND (
ARRAY_ANY("_u_9".a, _x -> _x = x.a) ARRAY_ANY("_u_9".a, _x -> _x = x.a) AND NOT "_u_9"."_u_10" IS NULL
AND NOT "_u_9"."_u_10" IS NULL
) )
AND ( AND (
( (
( (
x.a < "_u_11".a x.a < "_u_11".a AND NOT "_u_11"."_u_12" IS NULL
AND NOT "_u_11"."_u_12" IS NULL ) AND NOT "_u_11"."_u_12" IS NULL
)
AND NOT "_u_11"."_u_12" IS NULL
) )
AND ARRAY_ANY("_u_11"."_u_13", "_x" -> "_x" <> x.d) AND ARRAY_ANY("_u_11"."_u_13", "_x" -> "_x" <> x.d)
) )
AND ( AND (
NOT "_u_14".a IS NULL NOT "_u_14".a IS NULL AND NOT "_u_14".a IS NULL
AND NOT "_u_14".a IS NULL
) )
AND x.a IN ( AND x.a IN (
SELECT SELECT
@ -203,4 +192,3 @@ WHERE
y.a = x.a y.a = x.a
OFFSET 10 OFFSET 10
); );

View file

@ -56,14 +56,10 @@ LEFT JOIN (
WITH cte1 AS ( WITH cte1 AS (
SELECT SELECT
a, a,
z z AND e AS b
AND e AS b
FROM cte FROM cte
WHERE WHERE
x IN (1, 2, 3) x IN (1, 2, 3) AND z < -1 OR z > 1 AND w = 'AND'
AND z < -1
OR z > 1
AND w = 'AND'
), cte2 AS ( ), cte2 AS (
SELECT SELECT
RANK() OVER (PARTITION BY a, b ORDER BY x DESC) AS a, RANK() OVER (PARTITION BY a, b ORDER BY x DESC) AS a,
@ -77,18 +73,12 @@ WITH cte1 AS (
2 2
UNION ALL UNION ALL
SELECT SELECT
CASE x CASE x AND 1 + 1 = 2
AND 1 + 1 = 2
WHEN TRUE WHEN TRUE
THEN 1 THEN 1 AND 4 + 3 AND Z
AND 4 + 3 WHEN x AND y
AND Z
WHEN x
AND y
THEN 2 THEN 2
ELSE 3 ELSE 3 AND 4 AND g
AND 4
AND g
END END
UNION ALL UNION ALL
SELECT SELECT
@ -102,18 +92,8 @@ WITH cte1 AS (
) AS z ) AS z
UNION ALL UNION ALL
SELECT SELECT
MAX(COALESCE(x MAX(COALESCE(x AND y, a AND b AND c, d AND e)),
AND y, a FOO(CASE WHEN a AND b THEN c AND d ELSE 3 END)
AND b
AND c, d
AND e)),
FOO(CASE
WHEN a
AND b
THEN c
AND d
ELSE 3
END)
GROUP BY GROUP BY
x x
GROUPING SETS ( GROUPING SETS (
@ -154,10 +134,8 @@ LEFT JOIN (
FROM bar FROM bar
WHERE WHERE
( (
c > 1 c > 1 AND d > 1
AND d > 1 ) OR e > 1
)
OR e > 1
GROUP BY GROUP BY
a a
HAVING HAVING
@ -165,11 +143,8 @@ LEFT JOIN (
LIMIT 10 LIMIT 10
) AS z ) AS z
) AS y ) AS y
ON x.a = y.b ON x.a = y.b AND x.a > 1 OR (
AND x.a > 1 x.c = y.d OR x.c = y.e
OR (
x.c = y.d
OR x.c = y.e
); );
SELECT myCol1, myCol2 FROM baseTable LATERAL VIEW OUTER explode(col1) myTable1 AS myCol1 LATERAL VIEW explode(col2) myTable2 AS myCol2 SELECT myCol1, myCol2 FROM baseTable LATERAL VIEW OUTER explode(col1) myTable1 AS myCol1 LATERAL VIEW explode(col2) myTable2 AS myCol2
@ -184,9 +159,7 @@ EXPLODE(col1) myTable1 AS myCol1
LATERAL VIEW LATERAL VIEW
EXPLODE(col2) myTable2 AS myCol2 EXPLODE(col2) myTable2 AS myCol2
WHERE WHERE
a > 1 a > 1 AND b > 2 OR c > 3;
AND b > 2
OR c > 3;
SELECT * FROM (WITH y AS ( SELECT 1 AS z) SELECT z from y) x; SELECT * FROM (WITH y AS ( SELECT 1 AS z) SELECT z from y) x;
SELECT SELECT
@ -264,3 +237,53 @@ CREATE TABLE "t_customer_account" (
"account_no" VARCHAR(100) "account_no" VARCHAR(100)
); );
SELECT
x("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff"),
array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff"),
array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff", array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff")),
array(array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff")),
;
SELECT
X(
"aaaaaaaaaaaaaa",
"bbbbbbbbbbbbb",
"ccccccccc",
"ddddddddddddd",
"eeeeeeeeeeeee",
"fffffff"
),
ARRAY(
"aaaaaaaaaaaaaa",
"bbbbbbbbbbbbb",
"ccccccccc",
"ddddddddddddd",
"eeeeeeeeeeeee",
"fffffff"
),
ARRAY(
"aaaaaaaaaaaaaa",
"bbbbbbbbbbbbb",
"ccccccccc",
"ddddddddddddd",
"eeeeeeeeeeeee",
"fffffff",
ARRAY(
"aaaaaaaaaaaaaa",
"bbbbbbbbbbbbb",
"ccccccccc",
"ddddddddddddd",
"eeeeeeeeeeeee",
"fffffff"
)
),
ARRAY(
ARRAY(
"aaaaaaaaaaaaaa",
"bbbbbbbbbbbbb",
"ccccccccc",
"ddddddddddddd",
"eeeeeeeeeeeee",
"fffffff"
)
);

View file

@ -381,6 +381,11 @@ class TestBuild(unittest.TestCase):
lambda: alias(parse_one("LAG(x) OVER ()"), "a"), lambda: alias(parse_one("LAG(x) OVER ()"), "a"),
"LAG(x) OVER () AS a", "LAG(x) OVER () AS a",
), ),
(lambda: exp.values([("1", 2)]), "VALUES ('1', 2)"),
(lambda: exp.values([("1", 2)], "alias"), "(VALUES ('1', 2)) AS alias"),
(lambda: exp.values([("1", 2), ("2", 3)]), "VALUES ('1', 2), ('2', 3)"),
(lambda: exp.delete("y", where="x > 1"), "DELETE FROM y WHERE x > 1"),
(lambda: exp.delete("y", where=exp.and_("x > 1")), "DELETE FROM y WHERE x > 1"),
]: ]:
with self.subTest(sql): with self.subTest(sql):
self.assertEqual(expression().sql(dialect[0] if dialect else None), sql) self.assertEqual(expression().sql(dialect[0] if dialect else None), sql)

View file

@ -1,3 +1,4 @@
import datetime
import unittest import unittest
from sqlglot import alias, exp, parse_one from sqlglot import alias, exp, parse_one
@ -29,6 +30,7 @@ class TestExpressions(unittest.TestCase):
self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)")) self.assertEqual(parse_one("TO_DATE(x)", read="hive"), parse_one("ts_or_ds_to_date(x)"))
self.assertEqual(exp.Table(pivots=[]), exp.Table()) self.assertEqual(exp.Table(pivots=[]), exp.Table())
self.assertNotEqual(exp.Table(pivots=[None]), exp.Table()) self.assertNotEqual(exp.Table(pivots=[None]), exp.Table())
self.assertEqual(exp.DataType.build("int"), exp.DataType(this=exp.DataType.Type.INT, nested=False))
def test_find(self): def test_find(self):
expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y") expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y")
@ -486,6 +488,8 @@ class TestExpressions(unittest.TestCase):
((1, "2", None), "(1, '2', NULL)"), ((1, "2", None), "(1, '2', NULL)"),
([1, "2", None], "ARRAY(1, '2', NULL)"), ([1, "2", None], "ARRAY(1, '2', NULL)"),
({"x": None}, "MAP('x', NULL)"), ({"x": None}, "MAP('x', NULL)"),
(datetime.datetime(2022, 10, 1, 1, 1, 1), "TIME_STR_TO_TIME('2022-10-01 01:01:01')"),
(datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"),
]: ]:
with self.subTest(value): with self.subTest(value):
self.assertEqual(exp.convert(value).sql(), expected) self.assertEqual(exp.convert(value).sql(), expected)
@ -496,3 +500,25 @@ class TestExpressions(unittest.TestCase):
[e.alias_or_name for e in expression.expressions], [e.alias_or_name for e in expression.expressions],
["a", "B", "c", "D"], ["a", "B", "c", "D"],
) )
def test_to_table(self):
table_only = exp.to_table("table_name")
self.assertEqual(table_only.name, "table_name")
self.assertIsNone(table_only.args.get("db"))
self.assertIsNone(table_only.args.get("catalog"))
db_and_table = exp.to_table("db.table_name")
self.assertEqual(db_and_table.name, "table_name")
self.assertEqual(db_and_table.args.get("db"), exp.to_identifier("db"))
self.assertIsNone(db_and_table.args.get("catalog"))
catalog_db_and_table = exp.to_table("catalog.db.table_name")
self.assertEqual(catalog_db_and_table.name, "table_name")
self.assertEqual(catalog_db_and_table.args.get("db"), exp.to_identifier("db"))
self.assertEqual(catalog_db_and_table.args.get("catalog"), exp.to_identifier("catalog"))
def test_union(self):
expression = parse_one("SELECT cola, colb UNION SELECT colx, coly")
self.assertIsInstance(expression, exp.Union)
self.assertEqual(expression.named_selects, ["cola", "colb"])
self.assertEqual(
expression.selects, [exp.Column(this=exp.to_identifier("cola")), exp.Column(this=exp.to_identifier("colb"))]
)

View file

@ -191,6 +191,20 @@ class TestOptimizer(unittest.TestCase):
optimizer.optimize_joins.optimize_joins, optimizer.optimize_joins.optimize_joins,
) )
def test_eliminate_joins(self):
self.check_file(
"eliminate_joins",
optimizer.eliminate_joins.eliminate_joins,
pretty=True,
)
def test_eliminate_ctes(self):
self.check_file(
"eliminate_ctes",
optimizer.eliminate_ctes.eliminate_ctes,
pretty=True,
)
def test_merge_subqueries(self): def test_merge_subqueries(self):
optimize = partial( optimize = partial(
optimizer.optimize, optimizer.optimize,