1
0
Fork 0
sqlglot/sqlglot/expressions.py
Daniel Baumann 24cf9d8984
Adding upstream version 6.2.8.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 14:42:49 +01:00

3143 lines
78 KiB
Python

import numbers
import re
from collections import deque
from copy import deepcopy
from enum import auto
from sqlglot.errors import ParseError
from sqlglot.helper import (
AutoName,
camel_to_snake_case,
ensure_list,
list_get,
subclasses,
)
class _Expression(type):
def __new__(cls, clsname, bases, attrs):
klass = super().__new__(cls, clsname, bases, attrs)
klass.key = clsname.lower()
return klass
class Expression(metaclass=_Expression):
"""
The base class for all expressions in a syntax tree.
Attributes:
arg_types (dict): determines arguments supported by this expression.
The key in a dictionary defines a unique key of an argument using
which the argument's value can be retrieved. The value is a boolean
flag which indicates whether the argument's value is required (True)
or optional (False).
"""
key = None
arg_types = {"this": True}
__slots__ = ("args", "parent", "arg_key", "type")
def __init__(self, **args):
self.args = args
self.parent = None
self.arg_key = None
self.type = None
for arg_key, value in self.args.items():
self._set_parent(arg_key, value)
def __eq__(self, other):
return type(self) is type(other) and _norm_args(self) == _norm_args(other)
def __hash__(self):
return hash(
(
self.key,
tuple((k, tuple(v) if isinstance(v, list) else v) for k, v in _norm_args(self).items()),
)
)
@property
def this(self):
return self.args.get("this")
@property
def expression(self):
return self.args.get("expression")
@property
def expressions(self):
return self.args.get("expressions") or []
def text(self, key):
field = self.args.get(key)
if isinstance(field, str):
return field
if isinstance(field, (Identifier, Literal, Var)):
return field.this
return ""
@property
def is_string(self):
return isinstance(self, Literal) and self.args["is_string"]
@property
def is_number(self):
return isinstance(self, Literal) and not self.args["is_string"]
@property
def is_int(self):
if self.is_number:
try:
int(self.name)
return True
except ValueError:
pass
return False
@property
def alias(self):
if isinstance(self.args.get("alias"), TableAlias):
return self.args["alias"].name
return self.text("alias")
@property
def name(self):
return self.text("this")
@property
def alias_or_name(self):
return self.alias or self.name
def __deepcopy__(self, memo):
return self.__class__(**deepcopy(self.args))
def copy(self):
new = deepcopy(self)
for item, parent, _ in new.bfs():
if isinstance(item, Expression) and parent:
item.parent = parent
return new
def append(self, arg_key, value):
"""
Appends value to arg_key if it's a list or sets it as a new list.
Args:
arg_key (str): name of the list expression arg
value (Any): value to append to the list
"""
if not isinstance(self.args.get(arg_key), list):
self.args[arg_key] = []
self.args[arg_key].append(value)
self._set_parent(arg_key, value)
def set(self, arg_key, value):
"""
Sets `arg_key` to `value`.
Args:
arg_key (str): name of the expression arg
value: value to set the arg to.
"""
self.args[arg_key] = value
self._set_parent(arg_key, value)
def _set_parent(self, arg_key, value):
if isinstance(value, Expression):
value.parent = self
value.arg_key = arg_key
elif isinstance(value, list):
for v in value:
if isinstance(v, Expression):
v.parent = self
v.arg_key = arg_key
@property
def depth(self):
"""
Returns the depth of this tree.
"""
if self.parent:
return self.parent.depth + 1
return 0
def find(self, *expression_types, bfs=True):
"""
Returns the first node in this tree which matches at least one of
the specified types.
Args:
expression_types (type): the expression type(s) to match.
Returns:
the node which matches the criteria or None if no node matching
the criteria was found.
"""
return next(self.find_all(*expression_types, bfs=bfs), None)
def find_all(self, *expression_types, bfs=True):
"""
Returns a generator object which visits all nodes in this tree and only
yields those that match at least one of the specified expression types.
Args:
expression_types (type): the expression type(s) to match.
Returns:
the generator object.
"""
for expression, _, _ in self.walk(bfs=bfs):
if isinstance(expression, expression_types):
yield expression
def find_ancestor(self, *expression_types):
"""
Returns a nearest parent matching expression_types.
Args:
expression_types (type): the expression type(s) to match.
Returns:
the parent node
"""
ancestor = self.parent
while ancestor and not isinstance(ancestor, expression_types):
ancestor = ancestor.parent
return ancestor
@property
def parent_select(self):
"""
Returns the parent select statement.
"""
return self.find_ancestor(Select)
def walk(self, bfs=True, prune=None):
"""
Returns a generator object which visits all nodes in this tree.
Args:
bfs (bool): if set to True the BFS traversal order will be applied,
otherwise the DFS traversal will be used instead.
prune ((node, parent, arg_key) -> bool): callable that returns True if
the generator should stop traversing this branch of the tree.
Returns:
the generator object.
"""
if bfs:
yield from self.bfs(prune=prune)
else:
yield from self.dfs(prune=prune)
def dfs(self, parent=None, key=None, prune=None):
"""
Returns a generator object which visits all nodes in this tree in
the DFS (Depth-first) order.
Returns:
the generator object.
"""
parent = parent or self.parent
yield self, parent, key
if prune and prune(self, parent, key):
return
for k, v in self.args.items():
nodes = ensure_list(v)
for node in nodes:
if isinstance(node, Expression):
yield from node.dfs(self, k, prune)
def bfs(self, prune=None):
"""
Returns a generator object which visits all nodes in this tree in
the BFS (Breadth-first) order.
Returns:
the generator object.
"""
queue = deque([(self, self.parent, None)])
while queue:
item, parent, key = queue.popleft()
yield item, parent, key
if prune and prune(item, parent, key):
continue
if isinstance(item, Expression):
for k, v in item.args.items():
nodes = ensure_list(v)
for node in nodes:
if isinstance(node, Expression):
queue.append((node, item, k))
def unnest(self):
"""
Returns the first non parenthesis child or self.
"""
expression = self
while isinstance(expression, Paren):
expression = expression.this
return expression
def unalias(self):
"""
Returns the inner expression if this is an Alias.
"""
if isinstance(self, Alias):
return self.this
return self
def unnest_operands(self):
"""
Returns unnested operands as a tuple.
"""
return tuple(arg.unnest() for arg in self.args.values() if arg)
def flatten(self, unnest=True):
"""
Returns a generator which yields child nodes who's parents are the same class.
A AND B AND C -> [A, B, C]
"""
for node, _, _ in self.dfs(prune=lambda n, p, *_: p and not isinstance(n, self.__class__)):
if not isinstance(node, self.__class__):
yield node.unnest() if unnest else node
def __str__(self):
return self.sql()
def __repr__(self):
return self.to_s()
def sql(self, dialect=None, **opts):
"""
Returns SQL string representation of this tree.
Args
dialect (str): the dialect of the output SQL string
(eg. "spark", "hive", "presto", "mysql").
opts (dict): other :class:`~sqlglot.generator.Generator` options.
Returns
the SQL string.
"""
from sqlglot.dialects import Dialect
return Dialect.get_or_raise(dialect)().generate(self, **opts)
def to_s(self, hide_missing=True, level=0):
indent = "" if not level else "\n"
indent += "".join([" "] * level)
left = f"({self.key.upper()} "
args = {
k: ", ".join(
v.to_s(hide_missing=hide_missing, level=level + 1) if hasattr(v, "to_s") else str(v)
for v in ensure_list(vs)
if v is not None
)
for k, vs in self.args.items()
}
args = {k: v for k, v in args.items() if v or not hide_missing}
right = ", ".join(f"{k}: {v}" for k, v in args.items())
right += ")"
return indent + left + right
def transform(self, fun, *args, copy=True, **kwargs):
"""
Recursively visits all tree nodes (excluding already transformed ones)
and applies the given transformation function to each node.
Args:
fun (function): a function which takes a node as an argument and returns a
new transformed node or the same node without modifications. If the function
returns None, then the corresponding node will be removed from the syntax tree.
copy (bool): if set to True a new tree instance is constructed, otherwise the tree is
modified in place.
Returns:
the transformed tree.
"""
node = self.copy() if copy else self
new_node = fun(node, *args, **kwargs)
if new_node is None or not isinstance(new_node, Expression):
return new_node
if new_node is not node:
new_node.parent = node.parent
return new_node
replace_children(new_node, lambda child: child.transform(fun, *args, copy=False, **kwargs))
return new_node
def replace(self, expression):
"""
Swap out this expression with a new expression.
For example::
>>> tree = Select().select("x").from_("tbl")
>>> tree.find(Column).replace(Column(this="y"))
(COLUMN this: y)
>>> tree.sql()
'SELECT y FROM tbl'
Args:
expression (Expression|None): new node
Returns :
the new expression or expressions
"""
if not self.parent:
return expression
parent = self.parent
self.parent = None
replace_children(parent, lambda child: expression if child is self else child)
return expression
def pop(self):
"""
Remove this expression from its AST.
"""
self.replace(None)
def assert_is(self, type_):
"""
Assert that this `Expression` is an instance of `type_`.
If it is NOT an instance of `type_`, this raises an assertion error.
Otherwise, this returns this expression.
Examples:
This is useful for type security in chained expressions:
>>> import sqlglot
>>> sqlglot.parse_one("SELECT x from y").assert_is(Select).select("z").sql()
'SELECT x, z FROM y'
"""
assert isinstance(self, type_)
return self
class Condition(Expression):
def and_(self, *expressions, dialect=None, **opts):
"""
AND this condition with one or multiple expressions.
Example:
>>> condition("x=1").and_("y=1").sql()
'x = 1 AND y = 1'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
dialect (str): the dialect used to parse the input expression.
opts (kwargs): other options to use to parse the input expressions.
Returns:
And: the new condition.
"""
return and_(self, *expressions, dialect=dialect, **opts)
def or_(self, *expressions, dialect=None, **opts):
"""
OR this condition with one or multiple expressions.
Example:
>>> condition("x=1").or_("y=1").sql()
'x = 1 OR y = 1'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
dialect (str): the dialect used to parse the input expression.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Or: the new condition.
"""
return or_(self, *expressions, dialect=dialect, **opts)
def not_(self):
"""
Wrap this condition with NOT.
Example:
>>> condition("x=1").not_().sql()
'NOT x = 1'
Returns:
Not: the new condition.
"""
return not_(self)
class Predicate(Condition):
"""Relationships like x = y, x > 1, x >= y."""
class DerivedTable(Expression):
@property
def alias_column_names(self):
table_alias = self.args.get("alias")
if not table_alias:
return []
column_list = table_alias.assert_is(TableAlias).args.get("columns") or []
return [c.name for c in column_list]
@property
def selects(self):
alias = self.args.get("alias")
if alias:
return alias.columns
return []
@property
def named_selects(self):
return [select.alias_or_name for select in self.selects]
class UDTF(DerivedTable):
pass
class Annotation(Expression):
arg_types = {
"this": True,
"expression": True,
}
class Cache(Expression):
arg_types = {
"with": False,
"this": True,
"lazy": False,
"options": False,
"expression": False,
}
class Uncache(Expression):
arg_types = {"this": True, "exists": False}
class Create(Expression):
arg_types = {
"with": False,
"this": True,
"kind": True,
"expression": False,
"exists": False,
"properties": False,
"temporary": False,
"replace": False,
"unique": False,
"materialized": False,
}
class UserDefinedFunction(Expression):
arg_types = {"this": True, "expressions": False}
class UserDefinedFunctionKwarg(Expression):
arg_types = {"this": True, "kind": True, "default": False}
class CharacterSet(Expression):
arg_types = {"this": True, "default": False}
class With(Expression):
arg_types = {"expressions": True, "recursive": False}
class WithinGroup(Expression):
arg_types = {"this": True, "expression": False}
class CTE(DerivedTable):
arg_types = {"this": True, "alias": True}
class TableAlias(Expression):
arg_types = {"this": False, "columns": False}
@property
def columns(self):
return self.args.get("columns") or []
class BitString(Condition):
pass
class HexString(Condition):
pass
class Column(Condition):
arg_types = {"this": True, "table": False}
@property
def table(self):
return self.text("table")
class ColumnDef(Expression):
arg_types = {
"this": True,
"kind": True,
"constraints": False,
}
class ColumnConstraint(Expression):
arg_types = {"this": False, "kind": True}
class ColumnConstraintKind(Expression):
pass
class AutoIncrementColumnConstraint(ColumnConstraintKind):
pass
class CheckColumnConstraint(ColumnConstraintKind):
pass
class CollateColumnConstraint(ColumnConstraintKind):
pass
class CommentColumnConstraint(ColumnConstraintKind):
pass
class DefaultColumnConstraint(ColumnConstraintKind):
pass
class GeneratedAsIdentityColumnConstraint(ColumnConstraintKind):
# this: True -> ALWAYS, this: False -> BY DEFAULT
arg_types = {"this": True, "expression": False}
class NotNullColumnConstraint(ColumnConstraintKind):
pass
class PrimaryKeyColumnConstraint(ColumnConstraintKind):
pass
class UniqueColumnConstraint(ColumnConstraintKind):
pass
class Constraint(Expression):
arg_types = {"this": True, "expressions": True}
class Delete(Expression):
arg_types = {"with": False, "this": True, "where": False}
class Drop(Expression):
arg_types = {
"this": False,
"kind": False,
"exists": False,
"temporary": False,
"materialized": False,
}
class Filter(Expression):
arg_types = {"this": True, "expression": True}
class Check(Expression):
pass
class ForeignKey(Expression):
arg_types = {
"expressions": True,
"reference": False,
"delete": False,
"update": False,
}
class Unique(Expression):
arg_types = {"expressions": True}
class From(Expression):
arg_types = {"expressions": True}
class Having(Expression):
pass
class Hint(Expression):
arg_types = {"expressions": True}
class Identifier(Expression):
arg_types = {"this": True, "quoted": False}
@property
def quoted(self):
return bool(self.args.get("quoted"))
def __eq__(self, other):
return isinstance(other, self.__class__) and _norm_arg(self.this) == _norm_arg(other.this)
def __hash__(self):
return hash((self.key, self.this.lower()))
class Index(Expression):
arg_types = {"this": False, "table": False, "where": False, "columns": False}
class Insert(Expression):
arg_types = {
"with": False,
"this": True,
"expression": True,
"overwrite": False,
"exists": False,
"partition": False,
}
# https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
class Introducer(Expression):
arg_types = {"this": True, "expression": True}
class Partition(Expression):
pass
class Fetch(Expression):
arg_types = {"direction": False, "count": True}
class Group(Expression):
arg_types = {
"expressions": False,
"grouping_sets": False,
"cube": False,
"rollup": False,
}
class Lambda(Expression):
arg_types = {"this": True, "expressions": True}
class Limit(Expression):
arg_types = {"this": False, "expression": True}
class Literal(Condition):
arg_types = {"this": True, "is_string": True}
def __eq__(self, other):
return (
isinstance(other, Literal) and self.this == other.this and self.args["is_string"] == other.args["is_string"]
)
def __hash__(self):
return hash((self.key, self.this, self.args["is_string"]))
@classmethod
def number(cls, number):
return cls(this=str(number), is_string=False)
@classmethod
def string(cls, string):
return cls(this=str(string), is_string=True)
class Join(Expression):
arg_types = {
"this": True,
"on": False,
"side": False,
"kind": False,
"using": False,
"natural": False,
}
@property
def kind(self):
return self.text("kind").upper()
@property
def side(self):
return self.text("side").upper()
@property
def alias_or_name(self):
return self.this.alias_or_name
def on(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Append to or set the ON expressions.
Example:
>>> import sqlglot
>>> sqlglot.parse_one("JOIN x", into=Join).on("y = 1").sql()
'JOIN x ON y = 1'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
Multiple expressions are combined with an AND operator.
append (bool): if `True`, AND the new expressions to any existing expression.
Otherwise, this resets the expression.
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Join: the modified join expression.
"""
join = _apply_conjunction_builder(
*expressions,
instance=self,
arg="on",
append=append,
dialect=dialect,
copy=copy,
**opts,
)
if join.kind == "CROSS":
join.set("kind", None)
return join
class Lateral(UDTF):
arg_types = {"this": True, "outer": False, "alias": False}
# Clickhouse FROM FINAL modifier
# https://clickhouse.com/docs/en/sql-reference/statements/select/from/#final-modifier
class Final(Expression):
pass
class Offset(Expression):
arg_types = {"this": False, "expression": True}
class Order(Expression):
arg_types = {"this": False, "expressions": True}
# hive specific sorts
# https://cwiki.apache.org/confluence/display/Hive/LanguageManual+SortBy
class Cluster(Order):
pass
class Distribute(Order):
pass
class Sort(Order):
pass
class Ordered(Expression):
arg_types = {"this": True, "desc": True, "nulls_first": True}
class Property(Expression):
arg_types = {"this": True, "value": True}
class TableFormatProperty(Property):
pass
class PartitionedByProperty(Property):
pass
class FileFormatProperty(Property):
pass
class LocationProperty(Property):
pass
class EngineProperty(Property):
pass
class AutoIncrementProperty(Property):
pass
class CharacterSetProperty(Property):
arg_types = {"this": True, "value": True, "default": True}
class CollateProperty(Property):
pass
class SchemaCommentProperty(Property):
pass
class AnonymousProperty(Property):
pass
class ReturnsProperty(Property):
arg_types = {"this": True, "value": True, "is_table": False}
class LanguageProperty(Property):
pass
class ExecuteAsProperty(Property):
pass
class VolatilityProperty(Property):
arg_types = {"this": True}
class Properties(Expression):
arg_types = {"expressions": True}
PROPERTY_KEY_MAPPING = {
"AUTO_INCREMENT": AutoIncrementProperty,
"CHARACTER_SET": CharacterSetProperty,
"COLLATE": CollateProperty,
"COMMENT": SchemaCommentProperty,
"ENGINE": EngineProperty,
"FORMAT": FileFormatProperty,
"LOCATION": LocationProperty,
"PARTITIONED_BY": PartitionedByProperty,
"TABLE_FORMAT": TableFormatProperty,
}
@classmethod
def from_dict(cls, properties_dict):
expressions = []
for key, value in properties_dict.items():
property_cls = cls.PROPERTY_KEY_MAPPING.get(key.upper(), AnonymousProperty)
expressions.append(property_cls(this=Literal.string(key), value=convert(value)))
return cls(expressions=expressions)
class Qualify(Expression):
pass
class Reference(Expression):
arg_types = {"this": True, "expressions": True}
class Tuple(Expression):
arg_types = {"expressions": False}
class Subqueryable:
def subquery(self, alias=None, copy=True):
"""
Convert this expression to an aliased expression that can be used as a Subquery.
Example:
>>> subquery = Select().select("x").from_("tbl").subquery()
>>> Select().select("x").from_(subquery).sql()
'SELECT x FROM (SELECT x FROM tbl)'
Args:
alias (str or Identifier): an optional alias for the subquery
copy (bool): if `False`, modify this expression instance in-place.
Returns:
Alias: the subquery
"""
instance = _maybe_copy(self, copy)
return Subquery(
this=instance,
alias=TableAlias(this=to_identifier(alias)),
)
@property
def ctes(self):
with_ = self.args.get("with")
if not with_:
return []
return with_.expressions
def with_(
self,
alias,
as_,
recursive=None,
append=True,
dialect=None,
copy=True,
**opts,
):
"""
Append to or set the common table expressions.
Example:
>>> Select().with_("tbl2", as_="SELECT * FROM tbl").select("x").from_("tbl2").sql()
'WITH tbl2 AS (SELECT * FROM tbl) SELECT x FROM tbl2'
Args:
alias (str or Expression): the SQL code string to parse as the table name.
If an `Expression` instance is passed, this is used as-is.
as_ (str or Expression): the SQL code string to parse as the table expression.
If an `Expression` instance is passed, it will be used as-is.
recursive (bool): set the RECURSIVE part of the expression. Defaults to `False`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this resets the expressions.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
alias_expression = maybe_parse(
alias,
dialect=dialect,
into=TableAlias,
**opts,
)
as_expression = maybe_parse(
as_,
dialect=dialect,
**opts,
)
cte = CTE(
this=as_expression,
alias=alias_expression,
)
return _apply_child_list_builder(
cte,
instance=self,
arg="with",
append=append,
copy=copy,
into=With,
properties={"recursive": recursive or False},
)
QUERY_MODIFIERS = {
"laterals": False,
"joins": False,
"pivots": False,
"where": False,
"group": False,
"having": False,
"qualify": False,
"window": False,
"distribute": False,
"sort": False,
"cluster": False,
"order": False,
"limit": False,
"offset": False,
}
class Table(Expression):
arg_types = {
"this": True,
"db": False,
"catalog": False,
"laterals": False,
"joins": False,
"pivots": False,
}
class Union(Subqueryable, Expression):
arg_types = {
"with": False,
"this": True,
"expression": True,
"distinct": False,
**QUERY_MODIFIERS,
}
@property
def named_selects(self):
return self.args["this"].unnest().named_selects
@property
def left(self):
return self.this
@property
def right(self):
return self.expression
class Except(Union):
pass
class Intersect(Union):
pass
class Unnest(UDTF):
arg_types = {
"expressions": True,
"ordinality": False,
"alias": False,
}
class Update(Expression):
arg_types = {
"with": False,
"this": True,
"expressions": True,
"from": False,
"where": False,
}
class Values(UDTF):
arg_types = {
"expressions": True,
"ordinality": False,
"alias": False,
}
class Var(Expression):
pass
class Schema(Expression):
arg_types = {"this": False, "expressions": True}
class Select(Subqueryable, Expression):
arg_types = {
"with": False,
"expressions": False,
"hint": False,
"distinct": False,
"from": False,
**QUERY_MODIFIERS,
}
def from_(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Set the FROM expression.
Example:
>>> Select().from_("tbl").select("x").sql()
'SELECT x FROM tbl'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If a `From` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `From`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this flattens all the `From` expression into a single expression.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_child_list_builder(
*expressions,
instance=self,
arg="from",
append=append,
copy=copy,
prefix="FROM",
into=From,
dialect=dialect,
**opts,
)
def group_by(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Set the GROUP BY expression.
Example:
>>> Select().from_("tbl").select("x", "COUNT(1)").group_by("x").sql()
'SELECT x, COUNT(1) FROM tbl GROUP BY x'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If a `Group` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `Group`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this flattens all the `Group` expression into a single expression.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_child_list_builder(
*expressions,
instance=self,
arg="group",
append=append,
copy=copy,
prefix="GROUP BY",
into=Group,
dialect=dialect,
**opts,
)
def order_by(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Set the ORDER BY expression.
Example:
>>> Select().from_("tbl").select("x").order_by("x DESC").sql()
'SELECT x FROM tbl ORDER BY x DESC'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If a `Group` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `Order`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this flattens all the `Order` expression into a single expression.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_child_list_builder(
*expressions,
instance=self,
arg="order",
append=append,
copy=copy,
prefix="ORDER BY",
into=Order,
dialect=dialect,
**opts,
)
def sort_by(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Set the SORT BY expression.
Example:
>>> Select().from_("tbl").select("x").sort_by("x DESC").sql()
'SELECT x FROM tbl SORT BY x DESC'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If a `Group` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `SORT`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this flattens all the `Order` expression into a single expression.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_child_list_builder(
*expressions,
instance=self,
arg="sort",
append=append,
copy=copy,
prefix="SORT BY",
into=Sort,
dialect=dialect,
**opts,
)
def cluster_by(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Set the CLUSTER BY expression.
Example:
>>> Select().from_("tbl").select("x").cluster_by("x DESC").sql()
'SELECT x FROM tbl CLUSTER BY x DESC'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If a `Group` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `Cluster`.
append (bool): if `True`, add to any existing expressions.
Otherwise, this flattens all the `Order` expression into a single expression.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_child_list_builder(
*expressions,
instance=self,
arg="cluster",
append=append,
copy=copy,
prefix="CLUSTER BY",
into=Cluster,
dialect=dialect,
**opts,
)
def limit(self, expression, dialect=None, copy=True, **opts):
"""
Set the LIMIT expression.
Example:
>>> Select().from_("tbl").select("x").limit(10).sql()
'SELECT x FROM tbl LIMIT 10'
Args:
expression (str or int or Expression): the SQL code string to parse.
This can also be an integer.
If a `Limit` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `Limit`.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_builder(
expression=expression,
instance=self,
arg="limit",
into=Limit,
prefix="LIMIT",
dialect=dialect,
copy=copy,
**opts,
)
def offset(self, expression, dialect=None, copy=True, **opts):
"""
Set the OFFSET expression.
Example:
>>> Select().from_("tbl").select("x").offset(10).sql()
'SELECT x FROM tbl OFFSET 10'
Args:
expression (str or int or Expression): the SQL code string to parse.
This can also be an integer.
If a `Offset` instance is passed, this is used as-is.
If another `Expression` instance is passed, it will be wrapped in a `Offset`.
dialect (str): the dialect used to parse the input expression.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_builder(
expression=expression,
instance=self,
arg="offset",
into=Offset,
prefix="OFFSET",
dialect=dialect,
copy=copy,
**opts,
)
def select(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Append to or set the SELECT expressions.
Example:
>>> Select().select("x", "y").sql()
'SELECT x, y'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
append (bool): if `True`, add to any existing expressions.
Otherwise, this resets the expressions.
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_list_builder(
*expressions,
instance=self,
arg="expressions",
append=append,
dialect=dialect,
copy=copy,
**opts,
)
def lateral(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Append to or set the LATERAL expressions.
Example:
>>> Select().select("x").lateral("OUTER explode(y) tbl2 AS z").from_("tbl").sql()
'SELECT x FROM tbl LATERAL VIEW OUTER EXPLODE(y) tbl2 AS z'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
append (bool): if `True`, add to any existing expressions.
Otherwise, this resets the expressions.
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_list_builder(
*expressions,
instance=self,
arg="laterals",
append=append,
into=Lateral,
prefix="LATERAL VIEW",
dialect=dialect,
copy=copy,
**opts,
)
def join(
self,
expression,
on=None,
append=True,
join_type=None,
join_alias=None,
dialect=None,
copy=True,
**opts,
):
"""
Append to or set the JOIN expressions.
Example:
>>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y").sql()
'SELECT * FROM tbl JOIN tbl2 ON tbl1.y = tbl2.y'
Use `join_type` to change the type of join:
>>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y", join_type="left outer").sql()
'SELECT * FROM tbl LEFT OUTER JOIN tbl2 ON tbl1.y = tbl2.y'
Args:
expression (str or Expression): the SQL code string to parse.
If an `Expression` instance is passed, it will be used as-is.
on (str or Expression): optionally specify the join criteria as a SQL string.
If an `Expression` instance is passed, it will be used as-is.
append (bool): if `True`, add to any existing expressions.
Otherwise, this resets the expressions.
join_type (str): If set, alter the parsed join type
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
parse_args = {"dialect": dialect, **opts}
try:
expression = maybe_parse(expression, into=Join, prefix="JOIN", **parse_args)
except ParseError:
expression = maybe_parse(expression, into=(Join, Expression), **parse_args)
join = expression if isinstance(expression, Join) else Join(this=expression)
if isinstance(join.this, Select):
join.this.replace(join.this.subquery())
if join_type:
natural, side, kind = maybe_parse(join_type, into="JOIN_TYPE", **parse_args)
if natural:
join.set("natural", True)
if side:
join.set("side", side.text)
if kind:
join.set("kind", kind.text)
if on:
on = and_(*ensure_list(on), dialect=dialect, **opts)
join.set("on", on)
if join_alias:
join.set("this", alias_(join.args["this"], join_alias, table=True))
return _apply_list_builder(
join,
instance=self,
arg="joins",
append=append,
copy=copy,
**opts,
)
def where(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Append to or set the WHERE expressions.
Example:
>>> Select().select("x").from_("tbl").where("x = 'a' OR x < 'b'").sql()
"SELECT x FROM tbl WHERE x = 'a' OR x < 'b'"
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
Multiple expressions are combined with an AND operator.
append (bool): if `True`, AND the new expressions to any existing expression.
Otherwise, this resets the expression.
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_conjunction_builder(
*expressions,
instance=self,
arg="where",
append=append,
into=Where,
dialect=dialect,
copy=copy,
**opts,
)
def having(self, *expressions, append=True, dialect=None, copy=True, **opts):
"""
Append to or set the HAVING expressions.
Example:
>>> Select().select("x", "COUNT(y)").from_("tbl").group_by("x").having("COUNT(y) > 3").sql()
'SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 3'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an `Expression` instance is passed, it will be used as-is.
Multiple expressions are combined with an AND operator.
append (bool): if `True`, AND the new expressions to any existing expression.
Otherwise, this resets the expression.
dialect (str): the dialect used to parse the input expressions.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input expressions.
Returns:
Select: the modified expression.
"""
return _apply_conjunction_builder(
*expressions,
instance=self,
arg="having",
append=append,
into=Having,
dialect=dialect,
copy=copy,
**opts,
)
def distinct(self, distinct=True, copy=True):
"""
Set the OFFSET expression.
Example:
>>> Select().from_("tbl").select("x").distinct().sql()
'SELECT DISTINCT x FROM tbl'
Args:
distinct (bool): whether the Select should be distinct
copy (bool): if `False`, modify this expression instance in-place.
Returns:
Select: the modified expression.
"""
instance = _maybe_copy(self, copy)
instance.set("distinct", Distinct() if distinct else None)
return instance
def ctas(self, table, properties=None, dialect=None, copy=True, **opts):
"""
Convert this expression to a CREATE TABLE AS statement.
Example:
>>> Select().select("*").from_("tbl").ctas("x").sql()
'CREATE TABLE x AS SELECT * FROM tbl'
Args:
table (str or Expression): the SQL code string to parse as the table name.
If another `Expression` instance is passed, it will be used as-is.
properties (dict): an optional mapping of table properties
dialect (str): the dialect used to parse the input table.
copy (bool): if `False`, modify this expression instance in-place.
opts (kwargs): other options to use to parse the input table.
Returns:
Create: the CREATE TABLE AS expression
"""
instance = _maybe_copy(self, copy)
table_expression = maybe_parse(
table,
into=Table,
dialect=dialect,
**opts,
)
properties_expression = None
if properties:
properties_expression = Properties.from_dict(properties)
return Create(
this=table_expression,
kind="table",
expression=instance,
properties=properties_expression,
)
@property
def named_selects(self):
return [e.alias_or_name for e in self.expressions if e.alias_or_name]
@property
def selects(self):
return self.expressions
class Subquery(DerivedTable):
arg_types = {
"this": True,
"alias": False,
**QUERY_MODIFIERS,
}
def unnest(self):
"""
Returns the first non subquery.
"""
expression = self
while isinstance(expression, Subquery):
expression = expression.this
return expression
class TableSample(Expression):
arg_types = {
"this": False,
"method": False,
"bucket_numerator": False,
"bucket_denominator": False,
"bucket_field": False,
"percent": False,
"rows": False,
"size": False,
"seed": False,
}
class Pivot(Expression):
arg_types = {
"this": False,
"expressions": True,
"field": True,
"unpivot": True,
}
class Window(Expression):
arg_types = {
"this": True,
"partition_by": False,
"order": False,
"spec": False,
"alias": False,
}
class WindowSpec(Expression):
arg_types = {
"kind": False,
"start": False,
"start_side": False,
"end": False,
"end_side": False,
}
class Where(Expression):
pass
class Star(Expression):
arg_types = {"except": False, "replace": False}
@property
def name(self):
return "*"
class Parameter(Expression):
pass
class Placeholder(Expression):
arg_types = {}
class Null(Condition):
arg_types = {}
class Boolean(Condition):
pass
class DataType(Expression):
arg_types = {
"this": True,
"expressions": False,
"nested": False,
}
class Type(AutoName):
CHAR = auto()
NCHAR = auto()
VARCHAR = auto()
NVARCHAR = auto()
TEXT = auto()
BINARY = auto()
INT = auto()
TINYINT = auto()
SMALLINT = auto()
BIGINT = auto()
FLOAT = auto()
DOUBLE = auto()
DECIMAL = auto()
BOOLEAN = auto()
JSON = auto()
INTERVAL = auto()
TIMESTAMP = auto()
TIMESTAMPTZ = auto()
TIMESTAMPLTZ = auto()
DATE = auto()
DATETIME = auto()
ARRAY = auto()
MAP = auto()
UUID = auto()
GEOGRAPHY = auto()
GEOMETRY = auto()
STRUCT = auto()
NULLABLE = auto()
HLLSKETCH = auto()
SUPER = auto()
SERIAL = auto()
SMALLSERIAL = auto()
BIGSERIAL = auto()
XML = auto()
UNIQUEIDENTIFIER = auto()
MONEY = auto()
SMALLMONEY = auto()
ROWVERSION = auto()
IMAGE = auto()
VARIANT = auto()
OBJECT = auto()
@classmethod
def build(cls, dtype, **kwargs):
return DataType(
this=dtype if isinstance(dtype, DataType.Type) else DataType.Type[dtype.upper()],
**kwargs,
)
class StructKwarg(Expression):
arg_types = {"this": True, "expression": True}
# WHERE x <OP> EXISTS|ALL|ANY|SOME(SELECT ...)
class SubqueryPredicate(Predicate):
pass
class All(SubqueryPredicate):
pass
class Any(SubqueryPredicate):
pass
class Exists(SubqueryPredicate):
pass
# Commands to interact with the databases or engines
# These expressions don't truly parse the expression and consume
# whatever exists as a string until the end or a semicolon
class Command(Expression):
arg_types = {"this": True, "expression": False}
# Binary Expressions
# (ADD a b)
# (FROM table selects)
class Binary(Expression):
arg_types = {"this": True, "expression": True}
@property
def left(self):
return self.this
@property
def right(self):
return self.expression
class Add(Binary):
pass
class Connector(Binary, Condition):
pass
class And(Connector):
pass
class Or(Connector):
pass
class BitwiseAnd(Binary):
pass
class BitwiseLeftShift(Binary):
pass
class BitwiseOr(Binary):
pass
class BitwiseRightShift(Binary):
pass
class BitwiseXor(Binary):
pass
class Div(Binary):
pass
class Dot(Binary):
pass
class DPipe(Binary):
pass
class EQ(Binary, Predicate):
pass
class Escape(Binary):
pass
class GT(Binary, Predicate):
pass
class GTE(Binary, Predicate):
pass
class ILike(Binary, Predicate):
pass
class IntDiv(Binary):
pass
class Is(Binary, Predicate):
pass
class Like(Binary, Predicate):
pass
class SimilarTo(Binary, Predicate):
pass
class Distance(Binary):
pass
class LT(Binary, Predicate):
pass
class LTE(Binary, Predicate):
pass
class Mod(Binary):
pass
class Mul(Binary):
pass
class NEQ(Binary, Predicate):
pass
class Sub(Binary):
pass
# Unary Expressions
# (NOT a)
class Unary(Expression):
pass
class BitwiseNot(Unary):
pass
class Not(Unary, Condition):
pass
class Paren(Unary, Condition):
pass
class Neg(Unary):
pass
# Special Functions
class Alias(Expression):
arg_types = {"this": True, "alias": False}
class Aliases(Expression):
arg_types = {"this": True, "expressions": True}
@property
def aliases(self):
return self.expressions
class AtTimeZone(Expression):
arg_types = {"this": True, "zone": True}
class Between(Predicate):
arg_types = {"this": True, "low": True, "high": True}
class Bracket(Condition):
arg_types = {"this": True, "expressions": True}
class Distinct(Expression):
arg_types = {"this": False, "on": False}
class In(Predicate):
arg_types = {"this": True, "expressions": False, "query": False, "unnest": False}
class TimeUnit(Expression):
"""Automatically converts unit arg into a var."""
arg_types = {"unit": False}
def __init__(self, **args):
unit = args.get("unit")
if isinstance(unit, Column):
args["unit"] = Var(this=unit.name)
elif isinstance(unit, Week):
unit.set("this", Var(this=unit.this.name))
super().__init__(**args)
class Interval(TimeUnit):
arg_types = {"this": True, "unit": False}
class IgnoreNulls(Expression):
pass
class RespectNulls(Expression):
pass
# Functions
class Func(Condition):
"""
The base class for all function expressions.
Attributes
is_var_len_args (bool): if set to True the last argument defined in
arg_types will be treated as a variable length argument and the
argument's value will be stored as a list.
_sql_names (list): determines the SQL name (1st item in the list) and
aliases (subsequent items) for this function expression. These
values are used to map this node to a name during parsing as well
as to provide the function's name during SQL string generation. By
default the SQL name is set to the expression's class name transformed
to snake case.
"""
is_var_len_args = False
@classmethod
def from_arg_list(cls, args):
if cls.is_var_len_args:
all_arg_keys = list(cls.arg_types)
# If this function supports variable length argument treat the last argument as such.
non_var_len_arg_keys = all_arg_keys[:-1] if cls.is_var_len_args else all_arg_keys
num_non_var = len(non_var_len_arg_keys)
args_dict = {arg_key: arg for arg, arg_key in zip(args, non_var_len_arg_keys)}
args_dict[all_arg_keys[-1]] = args[num_non_var:]
else:
args_dict = {arg_key: arg for arg, arg_key in zip(args, cls.arg_types)}
return cls(**args_dict)
@classmethod
def sql_names(cls):
if cls is Func:
raise NotImplementedError("SQL name is only supported by concrete function implementations")
if not hasattr(cls, "_sql_names"):
cls._sql_names = [camel_to_snake_case(cls.__name__)]
return cls._sql_names
@classmethod
def sql_name(cls):
return cls.sql_names()[0]
@classmethod
def default_parser_mappings(cls):
return {name: cls.from_arg_list for name in cls.sql_names()}
class AggFunc(Func):
pass
class Abs(Func):
pass
class Anonymous(Func):
arg_types = {"this": True, "expressions": False}
is_var_len_args = True
class ApproxDistinct(AggFunc):
arg_types = {"this": True, "accuracy": False}
class Array(Func):
arg_types = {"expressions": False}
is_var_len_args = True
class ArrayAgg(AggFunc):
pass
class ArrayAll(Func):
arg_types = {"this": True, "expression": True}
class ArrayAny(Func):
arg_types = {"this": True, "expression": True}
class ArrayContains(Func):
arg_types = {"this": True, "expression": True}
class ArrayFilter(Func):
arg_types = {"this": True, "expression": True}
_sql_names = ["FILTER", "ARRAY_FILTER"]
class ArraySize(Func):
pass
class ArraySort(Func):
arg_types = {"this": True, "expression": False}
class ArraySum(Func):
pass
class ArrayUnionAgg(AggFunc):
pass
class Avg(AggFunc):
pass
class AnyValue(AggFunc):
pass
class Case(Func):
arg_types = {"this": False, "ifs": True, "default": False}
class Cast(Func):
arg_types = {"this": True, "to": True}
class TryCast(Cast):
pass
class Ceil(Func):
arg_types = {"this": True, "decimals": False}
_sql_names = ["CEIL", "CEILING"]
class Coalesce(Func):
arg_types = {"this": True, "expressions": False}
is_var_len_args = True
class ConcatWs(Func):
arg_types = {"expressions": False}
is_var_len_args = True
class Count(AggFunc):
pass
class CurrentDate(Func):
arg_types = {"this": False}
class CurrentDatetime(Func):
arg_types = {"this": False}
class CurrentTime(Func):
arg_types = {"this": False}
class CurrentTimestamp(Func):
arg_types = {"this": False}
class DateAdd(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DateSub(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DateDiff(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DateTrunc(Func, TimeUnit):
arg_types = {"this": True, "unit": True, "zone": False}
class DatetimeAdd(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DatetimeSub(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DatetimeDiff(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class DatetimeTrunc(Func, TimeUnit):
arg_types = {"this": True, "unit": True, "zone": False}
class Extract(Func):
arg_types = {"this": True, "expression": True}
class TimestampAdd(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimestampSub(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimestampDiff(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimestampTrunc(Func, TimeUnit):
arg_types = {"this": True, "unit": True, "zone": False}
class TimeAdd(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimeSub(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimeDiff(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TimeTrunc(Func, TimeUnit):
arg_types = {"this": True, "unit": True, "zone": False}
class DateStrToDate(Func):
pass
class DateToDateStr(Func):
pass
class DateToDi(Func):
pass
class Day(Func):
pass
class DiToDate(Func):
pass
class Exp(Func):
pass
class Explode(Func):
pass
class Floor(Func):
arg_types = {"this": True, "decimals": False}
class Greatest(Func):
arg_types = {"this": True, "expressions": True}
is_var_len_args = True
class GroupConcat(Func):
arg_types = {"this": True, "separator": False}
class If(Func):
arg_types = {"this": True, "true": True, "false": False}
class IfNull(Func):
arg_types = {"this": True, "expression": False}
_sql_names = ["IFNULL", "NVL"]
class Initcap(Func):
pass
class JSONExtract(Func):
arg_types = {"this": True, "path": True}
_sql_names = ["JSON_EXTRACT"]
class JSONExtractScalar(JSONExtract):
_sql_names = ["JSON_EXTRACT_SCALAR"]
class JSONBExtract(JSONExtract):
_sql_names = ["JSONB_EXTRACT"]
class JSONBExtractScalar(JSONExtract):
_sql_names = ["JSONB_EXTRACT_SCALAR"]
class Least(Func):
arg_types = {"this": True, "expressions": True}
is_var_len_args = True
class Length(Func):
pass
class Levenshtein(Func):
arg_types = {"this": True, "expression": False}
class Ln(Func):
pass
class Log(Func):
arg_types = {"this": True, "expression": False}
class Log2(Func):
pass
class Log10(Func):
pass
class Lower(Func):
pass
class Map(Func):
arg_types = {"keys": True, "values": True}
class Max(AggFunc):
pass
class Min(AggFunc):
pass
class Month(Func):
pass
class Nvl2(Func):
arg_types = {"this": True, "true": True, "false": False}
class Posexplode(Func):
pass
class Pow(Func):
arg_types = {"this": True, "power": True}
_sql_names = ["POWER", "POW"]
class Quantile(AggFunc):
arg_types = {"this": True, "quantile": True}
class ApproxQuantile(Quantile):
pass
class Reduce(Func):
arg_types = {"this": True, "initial": True, "merge": True, "finish": True}
class RegexpLike(Func):
arg_types = {"this": True, "expression": True, "flag": False}
class RegexpSplit(Func):
arg_types = {"this": True, "expression": True}
class Round(Func):
arg_types = {"this": True, "decimals": False}
class SafeDivide(Func):
arg_types = {"this": True, "expression": True}
class SetAgg(AggFunc):
pass
class SortArray(Func):
arg_types = {"this": True, "asc": False}
class Split(Func):
arg_types = {"this": True, "expression": True}
# Start may be omitted in the case of postgres
# https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
class Substring(Func):
arg_types = {"this": True, "start": False, "length": False}
class StrPosition(Func):
arg_types = {"this": True, "substr": True, "position": False}
class StrToDate(Func):
arg_types = {"this": True, "format": True}
class StrToTime(Func):
arg_types = {"this": True, "format": True}
class StrToUnix(Func):
arg_types = {"this": True, "format": True}
class Struct(Func):
arg_types = {"expressions": True}
is_var_len_args = True
class StructExtract(Func):
arg_types = {"this": True, "expression": True}
class Sum(AggFunc):
pass
class Sqrt(Func):
pass
class Stddev(AggFunc):
pass
class StddevPop(AggFunc):
pass
class StddevSamp(AggFunc):
pass
class TimeToStr(Func):
arg_types = {"this": True, "format": True}
class TimeToTimeStr(Func):
pass
class TimeToUnix(Func):
pass
class TimeStrToDate(Func):
pass
class TimeStrToTime(Func):
pass
class TimeStrToUnix(Func):
pass
class Trim(Func):
arg_types = {
"this": True,
"position": False,
"expression": False,
"collation": False,
}
class TsOrDsAdd(Func, TimeUnit):
arg_types = {"this": True, "expression": True, "unit": False}
class TsOrDsToDateStr(Func):
pass
class TsOrDsToDate(Func):
arg_types = {"this": True, "format": False}
class TsOrDiToDi(Func):
pass
class UnixToStr(Func):
arg_types = {"this": True, "format": True}
class UnixToTime(Func):
arg_types = {"this": True, "scale": False}
SECONDS = Literal.string("seconds")
MILLIS = Literal.string("millis")
MICROS = Literal.string("micros")
class UnixToTimeStr(Func):
pass
class Upper(Func):
pass
class Variance(AggFunc):
_sql_names = ["VARIANCE", "VARIANCE_SAMP", "VAR_SAMP"]
class VariancePop(AggFunc):
_sql_names = ["VARIANCE_POP", "VAR_POP"]
class Week(Func):
arg_types = {"this": True, "mode": False}
class Year(Func):
pass
def _norm_args(expression):
args = {}
for k, arg in expression.args.items():
if isinstance(arg, list):
arg = [_norm_arg(a) for a in arg]
if not arg:
arg = None
else:
arg = _norm_arg(arg)
if arg is not None:
args[k] = arg
return args
def _norm_arg(arg):
return arg.lower() if isinstance(arg, str) else arg
ALL_FUNCTIONS = subclasses(__name__, Func, (AggFunc, Anonymous, Func))
def maybe_parse(
sql_or_expression,
*,
into=None,
dialect=None,
prefix=None,
**opts,
):
"""Gracefully handle a possible string or expression.
Example:
>>> maybe_parse("1")
(LITERAL this: 1, is_string: False)
>>> maybe_parse(to_identifier("x"))
(IDENTIFIER this: x, quoted: False)
Args:
sql_or_expression (str or Expression): the SQL code string or an expression
into (Expression): the SQLGlot Expression to parse into
dialect (str): the dialect used to parse the input expressions (in the case that an
input expression is a SQL string).
prefix (str): a string to prefix the sql with before it gets parsed
(automatically includes a space)
**opts: other options to use to parse the input expressions (again, in the case
that an input expression is a SQL string).
Returns:
Expression: the parsed or given expression.
"""
if isinstance(sql_or_expression, Expression):
return sql_or_expression
import sqlglot
sql = str(sql_or_expression)
if prefix:
sql = f"{prefix} {sql}"
return sqlglot.parse_one(sql, read=dialect, into=into, **opts)
def _maybe_copy(instance, copy=True):
return instance.copy() if copy else instance
def _is_wrong_expression(expression, into):
return isinstance(expression, Expression) and not isinstance(expression, into)
def _apply_builder(
expression,
instance,
arg,
copy=True,
prefix=None,
into=None,
dialect=None,
**opts,
):
if _is_wrong_expression(expression, into):
expression = into(this=expression)
instance = _maybe_copy(instance, copy)
expression = maybe_parse(
sql_or_expression=expression,
prefix=prefix,
into=into,
dialect=dialect,
**opts,
)
instance.set(arg, expression)
return instance
def _apply_child_list_builder(
*expressions,
instance,
arg,
append=True,
copy=True,
prefix=None,
into=None,
dialect=None,
properties=None,
**opts,
):
instance = _maybe_copy(instance, copy)
parsed = []
for expression in expressions:
if _is_wrong_expression(expression, into):
expression = into(expressions=[expression])
expression = maybe_parse(
expression,
into=into,
dialect=dialect,
prefix=prefix,
**opts,
)
parsed.extend(expression.expressions)
existing = instance.args.get(arg)
if append and existing:
parsed = existing.expressions + parsed
child = into(expressions=parsed)
for k, v in (properties or {}).items():
child.set(k, v)
instance.set(arg, child)
return instance
def _apply_list_builder(
*expressions,
instance,
arg,
append=True,
copy=True,
prefix=None,
into=None,
dialect=None,
**opts,
):
inst = _maybe_copy(instance, copy)
expressions = [
maybe_parse(
sql_or_expression=expression,
into=into,
prefix=prefix,
dialect=dialect,
**opts,
)
for expression in expressions
]
existing_expressions = inst.args.get(arg)
if append and existing_expressions:
expressions = existing_expressions + expressions
inst.set(arg, expressions)
return inst
def _apply_conjunction_builder(
*expressions,
instance,
arg,
into=None,
append=True,
copy=True,
dialect=None,
**opts,
):
expressions = [exp for exp in expressions if exp is not None and exp != ""]
if not expressions:
return instance
inst = _maybe_copy(instance, copy)
existing = inst.args.get(arg)
if append and existing is not None:
expressions = [existing.this if into else existing] + list(expressions)
node = and_(*expressions, dialect=dialect, **opts)
inst.set(arg, into(this=node) if into else node)
return inst
def _combine(expressions, operator, dialect=None, **opts):
expressions = [condition(expression, dialect=dialect, **opts) for expression in expressions]
this = expressions[0]
if expressions[1:]:
this = _wrap_operator(this)
for expression in expressions[1:]:
this = operator(this=this, expression=_wrap_operator(expression))
return this
def _wrap_operator(expression):
if isinstance(expression, (And, Or, Not)):
expression = Paren(this=expression)
return expression
def select(*expressions, dialect=None, **opts):
"""
Initializes a syntax tree from one or multiple SELECT expressions.
Example:
>>> select("col1", "col2").from_("tbl").sql()
'SELECT col1, col2 FROM tbl'
Args:
*expressions (str or Expression): the SQL code string to parse as the expressions of a
SELECT statement. If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expressions (in the case that an
input expression is a SQL string).
**opts: other options to use to parse the input expressions (again, in the case
that an input expression is a SQL string).
Returns:
Select: the syntax tree for the SELECT statement.
"""
return Select().select(*expressions, dialect=dialect, **opts)
def from_(*expressions, dialect=None, **opts):
"""
Initializes a syntax tree from a FROM expression.
Example:
>>> from_("tbl").select("col1", "col2").sql()
'SELECT col1, col2 FROM tbl'
Args:
*expressions (str or Expression): the SQL code string to parse as the FROM expressions of a
SELECT statement. If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expression (in the case that the
input expression is a SQL string).
**opts: other options to use to parse the input expressions (again, in the case
that the input expression is a SQL string).
Returns:
Select: the syntax tree for the SELECT statement.
"""
return Select().from_(*expressions, dialect=dialect, **opts)
def update(table, properties, where=None, from_=None, dialect=None, **opts):
"""
Creates an update statement.
Example:
>>> update("my_table", {"x": 1, "y": "2", "z": None}, from_="baz", where="id > 1").sql()
"UPDATE my_table SET x = 1, y = '2', z = NULL FROM baz WHERE id > 1"
Args:
*properties (Dict[str, Any]): dictionary of properties to set which are
auto converted to sql objects eg None -> NULL
where (str): sql conditional parsed into a WHERE statement
from_ (str): sql statement parsed into a FROM statement
dialect (str): the dialect used to parse the input expressions.
**opts: other options to use to parse the input expressions.
Returns:
Update: the syntax tree for the UPDATE statement.
"""
update = Update(this=maybe_parse(table, into=Table, dialect=dialect))
update.set(
"expressions",
[EQ(this=maybe_parse(k, dialect=dialect, **opts), expression=convert(v)) for k, v in properties.items()],
)
if from_:
update.set("from", maybe_parse(from_, into=From, dialect=dialect, prefix="FROM", **opts))
if where:
update.set("where", maybe_parse(where, into=Where, dialect=dialect, prefix="WHERE", **opts))
return update
def condition(expression, dialect=None, **opts):
"""
Initialize a logical condition expression.
Example:
>>> condition("x=1").sql()
'x = 1'
This is helpful for composing larger logical syntax trees:
>>> where = condition("x=1")
>>> where = where.and_("y=1")
>>> Select().from_("tbl").select("*").where(where).sql()
'SELECT * FROM tbl WHERE x = 1 AND y = 1'
Args:
*expression (str or Expression): the SQL code string to parse.
If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expression (in the case that the
input expression is a SQL string).
**opts: other options to use to parse the input expressions (again, in the case
that the input expression is a SQL string).
Returns:
Condition: the expression
"""
return maybe_parse(
expression,
into=Condition,
dialect=dialect,
**opts,
)
def and_(*expressions, dialect=None, **opts):
"""
Combine multiple conditions with an AND logical operator.
Example:
>>> and_("x=1", and_("y=1", "z=1")).sql()
'x = 1 AND (y = 1 AND z = 1)'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
And: the new condition
"""
return _combine(expressions, And, dialect, **opts)
def or_(*expressions, dialect=None, **opts):
"""
Combine multiple conditions with an OR logical operator.
Example:
>>> or_("x=1", or_("y=1", "z=1")).sql()
'x = 1 OR (y = 1 OR z = 1)'
Args:
*expressions (str or Expression): the SQL code strings to parse.
If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
Or: the new condition
"""
return _combine(expressions, Or, dialect, **opts)
def not_(expression, dialect=None, **opts):
"""
Wrap a condition with a NOT operator.
Example:
>>> not_("this_suit='black'").sql()
"NOT this_suit = 'black'"
Args:
expression (str or Expression): the SQL code strings to parse.
If an Expression instance is passed, this is used as-is.
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
Not: the new condition
"""
this = condition(
expression,
dialect=dialect,
**opts,
)
return Not(this=_wrap_operator(this))
def paren(expression):
return Paren(this=expression)
SAFE_IDENTIFIER_RE = re.compile(r"^[a-zA-Z][\w]*$")
def to_identifier(alias, quoted=None):
if alias is None:
return None
if isinstance(alias, Identifier):
identifier = alias
elif isinstance(alias, str):
if quoted is None:
quoted = not re.match(SAFE_IDENTIFIER_RE, alias)
identifier = Identifier(this=alias, quoted=quoted)
else:
raise ValueError(f"Alias needs to be a string or an Identifier, got: {alias.__class__}")
return identifier
def alias_(expression, alias, table=False, dialect=None, quoted=None, **opts):
"""
Create an Alias expression.
Expample:
>>> alias_('foo', 'bar').sql()
'foo AS bar'
Args:
expression (str or Expression): the SQL code strings to parse.
If an Expression instance is passed, this is used as-is.
alias (str or Identifier): the alias name to use. If the name has
special characters it is quoted.
table (boolean): create a table alias, default false
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
Alias: the aliased expression
"""
exp = maybe_parse(expression, dialect=dialect, **opts)
alias = to_identifier(alias, quoted=quoted)
alias = TableAlias(this=alias) if table else alias
if "alias" in exp.arg_types:
exp = exp.copy()
exp.set("alias", alias)
return exp
return Alias(this=exp, alias=alias)
def subquery(expression, alias=None, dialect=None, **opts):
"""
Build a subquery expression.
Expample:
>>> subquery('select x from tbl', 'bar').select('x').sql()
'SELECT x FROM (SELECT x FROM tbl) AS bar'
Args:
expression (str or Expression): the SQL code strings to parse.
If an Expression instance is passed, this is used as-is.
alias (str or Expression): the alias name to use.
dialect (str): the dialect used to parse the input expression.
**opts: other options to use to parse the input expressions.
Returns:
Select: a new select with the subquery expression included
"""
expression = maybe_parse(expression, dialect=dialect, **opts).subquery(alias)
return Select().from_(expression, dialect=dialect, **opts)
def column(col, table=None, quoted=None):
"""
Build a Column.
Args:
col (str or Expression): column name
table (str or Expression): table name
Returns:
Column: column instance
"""
return Column(
this=to_identifier(col, quoted=quoted),
table=to_identifier(table, quoted=quoted),
)
def table_(table, db=None, catalog=None, quoted=None):
"""Build a Table.
Args:
table (str or Expression): column name
db (str or Expression): db name
catalog (str or Expression): catalog name
Returns:
Table: table instance
"""
return Table(
this=to_identifier(table, quoted=quoted),
db=to_identifier(db, quoted=quoted),
catalog=to_identifier(catalog, quoted=quoted),
)
def convert(value):
"""Convert a python value into an expression object.
Raises an error if a conversion is not possible.
Args:
value (Any): a python object
Returns:
Expression: the equivalent expression object
"""
if isinstance(value, Expression):
return value
if value is None:
return NULL
if isinstance(value, bool):
return Boolean(this=value)
if isinstance(value, str):
return Literal.string(value)
if isinstance(value, numbers.Number):
return Literal.number(value)
if isinstance(value, tuple):
return Tuple(expressions=[convert(v) for v in value])
if isinstance(value, list):
return Array(expressions=[convert(v) for v in value])
if isinstance(value, dict):
return Map(
keys=[convert(k) for k in value.keys()],
values=[convert(v) for v in value.values()],
)
raise ValueError(f"Cannot convert {value}")
def replace_children(expression, fun):
"""
Replace children of an expression with the result of a lambda fun(child) -> exp.
"""
for k, v in expression.args.items():
is_list_arg = isinstance(v, list)
child_nodes = v if is_list_arg else [v]
new_child_nodes = []
for cn in child_nodes:
if isinstance(cn, Expression):
cns = ensure_list(fun(cn))
for child_node in cns:
new_child_nodes.append(child_node)
child_node.parent = expression
child_node.arg_key = k
else:
new_child_nodes.append(cn)
expression.args[k] = new_child_nodes if is_list_arg else list_get(new_child_nodes, 0)
def column_table_names(expression):
"""
Return all table names referenced through columns in an expression.
Example:
>>> import sqlglot
>>> column_table_names(sqlglot.parse_one("a.b AND c.d AND c.e"))
['c', 'a']
Args:
expression (sqlglot.Expression): expression to find table names
Returns:
list: A list of unique names
"""
return list(dict.fromkeys(column.table for column in expression.find_all(Column)))
TRUE = Boolean(this=True)
FALSE = Boolean(this=False)
NULL = Null()