""" ## Expressions Every AST node in SQLGlot is represented by a subclass of `Expression`. This module contains the implementation of all supported `Expression` types. Additionally, it exposes a number of helper functions, which are mainly used to programmatically build SQL expressions, such as `sqlglot.expressions.select`. ---- """ from __future__ import annotations import datetime import math import numbers import re import textwrap import typing as t from collections import deque from copy import deepcopy from decimal import Decimal from enum import auto from functools import reduce from sqlglot.errors import ErrorLevel, ParseError from sqlglot.helper import ( AutoName, camel_to_snake_case, ensure_collection, ensure_list, seq_get, subclasses, to_bool, ) from sqlglot.tokens import Token, TokenError if t.TYPE_CHECKING: from typing_extensions import Self from sqlglot._typing import E, Lit from sqlglot.dialects.dialect import DialectType Q = t.TypeVar("Q", bound="Query") S = t.TypeVar("S", bound="SetOperation") class _Expression(type): def __new__(cls, clsname, bases, attrs): klass = super().__new__(cls, clsname, bases, attrs) # When an Expression class is created, its key is automatically set to be # the lowercase version of the class' name. klass.key = clsname.lower() # This is so that docstrings are not inherited in pdoc klass.__doc__ = klass.__doc__ or "" return klass SQLGLOT_META = "sqlglot.meta" SQLGLOT_ANONYMOUS = "sqlglot.anonymous" TABLE_PARTS = ("this", "db", "catalog") COLUMN_PARTS = ("this", "table", "db", "catalog") class Expression(metaclass=_Expression): """ The base class for all expressions in a syntax tree. Each Expression encapsulates any necessary context, such as its child expressions, their names (arg keys), and whether a given child expression is optional or not. Attributes: key: a unique key for each class in the Expression hierarchy. This is useful for hashing and representing expressions as strings. arg_types: determines the arguments (child nodes) supported by an expression. It maps arg keys to booleans that indicate whether the corresponding args are optional. parent: a reference to the parent expression (or None, in case of root expressions). arg_key: the arg key an expression is associated with, i.e. the name its parent expression uses to refer to it. index: the index of an expression if it is inside of a list argument in its parent. comments: a list of comments that are associated with a given expression. This is used in order to preserve comments when transpiling SQL code. type: the `sqlglot.expressions.DataType` type of an expression. This is inferred by the optimizer, in order to enable some transformations that require type information. meta: a dictionary that can be used to store useful metadata for a given expression. Example: >>> class Foo(Expression): ... arg_types = {"this": True, "expression": False} The above definition informs us that Foo is an Expression that requires an argument called "this" and may also optionally receive an argument called "expression". Args: args: a mapping used for retrieving the arguments of an expression, given their arg keys. """ key = "expression" arg_types = {"this": True} __slots__ = ("args", "parent", "arg_key", "index", "comments", "_type", "_meta", "_hash") def __init__(self, **args: t.Any): self.args: t.Dict[str, t.Any] = args self.parent: t.Optional[Expression] = None self.arg_key: t.Optional[str] = None self.index: t.Optional[int] = None self.comments: t.Optional[t.List[str]] = None self._type: t.Optional[DataType] = None self._meta: t.Optional[t.Dict[str, t.Any]] = None self._hash: t.Optional[int] = None for arg_key, value in self.args.items(): self._set_parent(arg_key, value) def __eq__(self, other) -> bool: return type(self) is type(other) and hash(self) == hash(other) @property def hashable_args(self) -> t.Any: return frozenset( (k, tuple(_norm_arg(a) for a in v) if type(v) is list else _norm_arg(v)) for k, v in self.args.items() if not (v is None or v is False or (type(v) is list and not v)) ) def __hash__(self) -> int: if self._hash is not None: return self._hash return hash((self.__class__, self.hashable_args)) @property def this(self) -> t.Any: """ Retrieves the argument with key "this". """ return self.args.get("this") @property def expression(self) -> t.Any: """ Retrieves the argument with key "expression". """ return self.args.get("expression") @property def expressions(self) -> t.List[t.Any]: """ Retrieves the argument with key "expressions". """ return self.args.get("expressions") or [] def text(self, key) -> str: """ Returns a textual representation of the argument corresponding to "key". This can only be used for args that are strings or leaf Expression instances, such as identifiers and literals. """ field = self.args.get(key) if isinstance(field, str): return field if isinstance(field, (Identifier, Literal, Var)): return field.this if isinstance(field, (Star, Null)): return field.name return "" @property def is_string(self) -> bool: """ Checks whether a Literal expression is a string. """ return isinstance(self, Literal) and self.args["is_string"] @property def is_number(self) -> bool: """ Checks whether a Literal expression is a number. """ return (isinstance(self, Literal) and not self.args["is_string"]) or ( isinstance(self, Neg) and self.this.is_number ) def to_py(self) -> t.Any: """ Returns a Python object equivalent of the SQL node. """ raise ValueError(f"{self} cannot be converted to a Python object.") @property def is_int(self) -> bool: """ Checks whether an expression is an integer. """ return self.is_number and isinstance(self.to_py(), int) @property def is_star(self) -> bool: """Checks whether an expression is a star.""" return isinstance(self, Star) or (isinstance(self, Column) and isinstance(self.this, Star)) @property def alias(self) -> str: """ Returns the alias of the expression, or an empty string if it's not aliased. """ if isinstance(self.args.get("alias"), TableAlias): return self.args["alias"].name return self.text("alias") @property def alias_column_names(self) -> t.List[str]: table_alias = self.args.get("alias") if not table_alias: return [] return [c.name for c in table_alias.args.get("columns") or []] @property def name(self) -> str: return self.text("this") @property def alias_or_name(self) -> str: return self.alias or self.name @property def output_name(self) -> str: """ Name of the output column if this expression is a selection. If the Expression has no output name, an empty string is returned. Example: >>> from sqlglot import parse_one >>> parse_one("SELECT a").expressions[0].output_name 'a' >>> parse_one("SELECT b AS c").expressions[0].output_name 'c' >>> parse_one("SELECT 1 + 2").expressions[0].output_name '' """ return "" @property def type(self) -> t.Optional[DataType]: return self._type @type.setter def type(self, dtype: t.Optional[DataType | DataType.Type | str]) -> None: if dtype and not isinstance(dtype, DataType): dtype = DataType.build(dtype) self._type = dtype # type: ignore def is_type(self, *dtypes) -> bool: return self.type is not None and self.type.is_type(*dtypes) def is_leaf(self) -> bool: return not any(isinstance(v, (Expression, list)) for v in self.args.values()) @property def meta(self) -> t.Dict[str, t.Any]: if self._meta is None: self._meta = {} return self._meta def __deepcopy__(self, memo): root = self.__class__() stack = [(self, root)] while stack: node, copy = stack.pop() if node.comments is not None: copy.comments = deepcopy(node.comments) if node._type is not None: copy._type = deepcopy(node._type) if node._meta is not None: copy._meta = deepcopy(node._meta) if node._hash is not None: copy._hash = node._hash for k, vs in node.args.items(): if hasattr(vs, "parent"): stack.append((vs, vs.__class__())) copy.set(k, stack[-1][-1]) elif type(vs) is list: copy.args[k] = [] for v in vs: if hasattr(v, "parent"): stack.append((v, v.__class__())) copy.append(k, stack[-1][-1]) else: copy.append(k, v) else: copy.args[k] = vs return root def copy(self) -> Self: """ Returns a deep copy of the expression. """ return deepcopy(self) def add_comments(self, comments: t.Optional[t.List[str]] = None, prepend: bool = False) -> None: if self.comments is None: self.comments = [] if comments: for comment in comments: _, *meta = comment.split(SQLGLOT_META) if meta: for kv in "".join(meta).split(","): k, *v = kv.split("=") value = v[0].strip() if v else True self.meta[k.strip()] = to_bool(value) if not prepend: self.comments.append(comment) if prepend: self.comments = comments + self.comments def pop_comments(self) -> t.List[str]: comments = self.comments or [] self.comments = None return comments def append(self, arg_key: str, value: t.Any) -> None: """ Appends value to arg_key if it's a list or sets it as a new list. Args: arg_key (str): name of the list expression arg value (Any): value to append to the list """ if type(self.args.get(arg_key)) is not list: self.args[arg_key] = [] self._set_parent(arg_key, value) values = self.args[arg_key] if hasattr(value, "parent"): value.index = len(values) values.append(value) def set( self, arg_key: str, value: t.Any, index: t.Optional[int] = None, overwrite: bool = True, ) -> None: """ Sets arg_key to value. Args: arg_key: name of the expression arg. value: value to set the arg to. index: if the arg is a list, this specifies what position to add the value in it. overwrite: assuming an index is given, this determines whether to overwrite the list entry instead of only inserting a new value (i.e., like list.insert). """ if index is not None: expressions = self.args.get(arg_key) or [] if seq_get(expressions, index) is None: return if value is None: expressions.pop(index) for v in expressions[index:]: v.index = v.index - 1 return if isinstance(value, list): expressions.pop(index) expressions[index:index] = value elif overwrite: expressions[index] = value else: expressions.insert(index, value) value = expressions elif value is None: self.args.pop(arg_key, None) return self.args[arg_key] = value self._set_parent(arg_key, value, index) def _set_parent(self, arg_key: str, value: t.Any, index: t.Optional[int] = None) -> None: if hasattr(value, "parent"): value.parent = self value.arg_key = arg_key value.index = index elif type(value) is list: for index, v in enumerate(value): if hasattr(v, "parent"): v.parent = self v.arg_key = arg_key v.index = index @property def depth(self) -> int: """ Returns the depth of this tree. """ if self.parent: return self.parent.depth + 1 return 0 def iter_expressions(self, reverse: bool = False) -> t.Iterator[Expression]: """Yields the key and expression for all arguments, exploding list args.""" for vs in reversed(self.args.values()) if reverse else self.args.values(): # type: ignore if type(vs) is list: for v in reversed(vs) if reverse else vs: # type: ignore if hasattr(v, "parent"): yield v else: if hasattr(vs, "parent"): yield vs def find(self, *expression_types: t.Type[E], bfs: bool = True) -> t.Optional[E]: """ Returns the first node in this tree which matches at least one of the specified types. Args: expression_types: the expression type(s) to match. bfs: whether to search the AST using the BFS algorithm (DFS is used if false). Returns: The node which matches the criteria or None if no such node was found. """ return next(self.find_all(*expression_types, bfs=bfs), None) def find_all(self, *expression_types: t.Type[E], bfs: bool = True) -> t.Iterator[E]: """ Returns a generator object which visits all nodes in this tree and only yields those that match at least one of the specified expression types. Args: expression_types: the expression type(s) to match. bfs: whether to search the AST using the BFS algorithm (DFS is used if false). Returns: The generator object. """ for expression in self.walk(bfs=bfs): if isinstance(expression, expression_types): yield expression def find_ancestor(self, *expression_types: t.Type[E]) -> t.Optional[E]: """ Returns a nearest parent matching expression_types. Args: expression_types: the expression type(s) to match. Returns: The parent node. """ ancestor = self.parent while ancestor and not isinstance(ancestor, expression_types): ancestor = ancestor.parent return ancestor # type: ignore @property def parent_select(self) -> t.Optional[Select]: """ Returns the parent select statement. """ return self.find_ancestor(Select) @property def same_parent(self) -> bool: """Returns if the parent is the same class as itself.""" return type(self.parent) is self.__class__ def root(self) -> Expression: """ Returns the root expression of this tree. """ expression = self while expression.parent: expression = expression.parent return expression def walk( self, bfs: bool = True, prune: t.Optional[t.Callable[[Expression], bool]] = None ) -> t.Iterator[Expression]: """ Returns a generator object which visits all nodes in this tree. Args: bfs: if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead. prune: callable that returns True if the generator should stop traversing this branch of the tree. Returns: the generator object. """ if bfs: yield from self.bfs(prune=prune) else: yield from self.dfs(prune=prune) def dfs( self, prune: t.Optional[t.Callable[[Expression], bool]] = None ) -> t.Iterator[Expression]: """ Returns a generator object which visits all nodes in this tree in the DFS (Depth-first) order. Returns: The generator object. """ stack = [self] while stack: node = stack.pop() yield node if prune and prune(node): continue for v in node.iter_expressions(reverse=True): stack.append(v) def bfs( self, prune: t.Optional[t.Callable[[Expression], bool]] = None ) -> t.Iterator[Expression]: """ Returns a generator object which visits all nodes in this tree in the BFS (Breadth-first) order. Returns: The generator object. """ queue = deque([self]) while queue: node = queue.popleft() yield node if prune and prune(node): continue for v in node.iter_expressions(): queue.append(v) def unnest(self): """ Returns the first non parenthesis child or self. """ expression = self while type(expression) is Paren: expression = expression.this return expression def unalias(self): """ Returns the inner expression if this is an Alias. """ if isinstance(self, Alias): return self.this return self def unnest_operands(self): """ Returns unnested operands as a tuple. """ return tuple(arg.unnest() for arg in self.iter_expressions()) def flatten(self, unnest=True): """ Returns a generator which yields child nodes whose parents are the same class. A AND B AND C -> [A, B, C] """ for node in self.dfs(prune=lambda n: n.parent and type(n) is not self.__class__): if type(node) is not self.__class__: yield node.unnest() if unnest and not isinstance(node, Subquery) else node def __str__(self) -> str: return self.sql() def __repr__(self) -> str: return _to_s(self) def to_s(self) -> str: """ Same as __repr__, but includes additional information which can be useful for debugging, like empty or missing args and the AST nodes' object IDs. """ return _to_s(self, verbose=True) def sql(self, dialect: DialectType = None, **opts) -> str: """ Returns SQL string representation of this tree. Args: dialect: the dialect of the output SQL string (eg. "spark", "hive", "presto", "mysql"). opts: other `sqlglot.generator.Generator` options. Returns: The SQL string. """ from sqlglot.dialects import Dialect return Dialect.get_or_raise(dialect).generate(self, **opts) def transform(self, fun: t.Callable, *args: t.Any, copy: bool = True, **kwargs) -> Expression: """ Visits all tree nodes (excluding already transformed ones) and applies the given transformation function to each node. Args: fun: a function which takes a node as an argument and returns a new transformed node or the same node without modifications. If the function returns None, then the corresponding node will be removed from the syntax tree. copy: if set to True a new tree instance is constructed, otherwise the tree is modified in place. Returns: The transformed tree. """ root = None new_node = None for node in (self.copy() if copy else self).dfs(prune=lambda n: n is not new_node): parent, arg_key, index = node.parent, node.arg_key, node.index new_node = fun(node, *args, **kwargs) if not root: root = new_node elif parent and arg_key and new_node is not node: parent.set(arg_key, new_node, index) assert root return root.assert_is(Expression) @t.overload def replace(self, expression: E) -> E: ... @t.overload def replace(self, expression: None) -> None: ... def replace(self, expression): """ Swap out this expression with a new expression. For example:: >>> tree = Select().select("x").from_("tbl") >>> tree.find(Column).replace(column("y")) Column( this=Identifier(this=y, quoted=False)) >>> tree.sql() 'SELECT y FROM tbl' Args: expression: new node Returns: The new expression or expressions. """ parent = self.parent if not parent or parent is expression: return expression key = self.arg_key value = parent.args.get(key) if type(expression) is list and isinstance(value, Expression): # We are trying to replace an Expression with a list, so it's assumed that # the intention was to really replace the parent of this expression. value.parent.replace(expression) else: parent.set(key, expression, self.index) if expression is not self: self.parent = None self.arg_key = None self.index = None return expression def pop(self: E) -> E: """ Remove this expression from its AST. Returns: The popped expression. """ self.replace(None) return self def assert_is(self, type_: t.Type[E]) -> E: """ Assert that this `Expression` is an instance of `type_`. If it is NOT an instance of `type_`, this raises an assertion error. Otherwise, this returns this expression. Examples: This is useful for type security in chained expressions: >>> import sqlglot >>> sqlglot.parse_one("SELECT x from y").assert_is(Select).select("z").sql() 'SELECT x, z FROM y' """ if not isinstance(self, type_): raise AssertionError(f"{self} is not {type_}.") return self def error_messages(self, args: t.Optional[t.Sequence] = None) -> t.List[str]: """ Checks if this expression is valid (e.g. all mandatory args are set). Args: args: a sequence of values that were used to instantiate a Func expression. This is used to check that the provided arguments don't exceed the function argument limit. Returns: A list of error messages for all possible errors that were found. """ errors: t.List[str] = [] for k in self.args: if k not in self.arg_types: errors.append(f"Unexpected keyword: '{k}' for {self.__class__}") for k, mandatory in self.arg_types.items(): v = self.args.get(k) if mandatory and (v is None or (isinstance(v, list) and not v)): errors.append(f"Required keyword: '{k}' missing for {self.__class__}") if ( args and isinstance(self, Func) and len(args) > len(self.arg_types) and not self.is_var_len_args ): errors.append( f"The number of provided arguments ({len(args)}) is greater than " f"the maximum number of supported arguments ({len(self.arg_types)})" ) return errors def dump(self): """ Dump this Expression to a JSON-serializable dict. """ from sqlglot.serde import dump return dump(self) @classmethod def load(cls, obj): """ Load a dict (as returned by `Expression.dump`) into an Expression instance. """ from sqlglot.serde import load return load(obj) def and_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ AND this condition with one or multiple expressions. Example: >>> condition("x=1").and_("y=1").sql() 'x = 1 AND y = 1' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the involved expressions (only applies to Expressions). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. opts: other options to use to parse the input expressions. Returns: The new And condition. """ return and_(self, *expressions, dialect=dialect, copy=copy, wrap=wrap, **opts) def or_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ OR this condition with one or multiple expressions. Example: >>> condition("x=1").or_("y=1").sql() 'x = 1 OR y = 1' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the involved expressions (only applies to Expressions). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. opts: other options to use to parse the input expressions. Returns: The new Or condition. """ return or_(self, *expressions, dialect=dialect, copy=copy, wrap=wrap, **opts) def not_(self, copy: bool = True): """ Wrap this condition with NOT. Example: >>> condition("x=1").not_().sql() 'NOT x = 1' Args: copy: whether to copy this object. Returns: The new Not instance. """ return not_(self, copy=copy) def as_( self, alias: str | Identifier, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Alias: return alias_(self, alias, quoted=quoted, dialect=dialect, copy=copy, **opts) def _binop(self, klass: t.Type[E], other: t.Any, reverse: bool = False) -> E: this = self.copy() other = convert(other, copy=True) if not isinstance(this, klass) and not isinstance(other, klass): this = _wrap(this, Binary) other = _wrap(other, Binary) if reverse: return klass(this=other, expression=this) return klass(this=this, expression=other) def __getitem__(self, other: ExpOrStr | t.Tuple[ExpOrStr]) -> Bracket: return Bracket( this=self.copy(), expressions=[convert(e, copy=True) for e in ensure_list(other)] ) def __iter__(self) -> t.Iterator: if "expressions" in self.arg_types: return iter(self.args.get("expressions") or []) # We define this because __getitem__ converts Expression into an iterable, which is # problematic because one can hit infinite loops if they do "for x in some_expr: ..." # See: https://peps.python.org/pep-0234/ raise TypeError(f"'{self.__class__.__name__}' object is not iterable") def isin( self, *expressions: t.Any, query: t.Optional[ExpOrStr] = None, unnest: t.Optional[ExpOrStr] | t.Collection[ExpOrStr] = None, copy: bool = True, **opts, ) -> In: subquery = maybe_parse(query, copy=copy, **opts) if query else None if subquery and not isinstance(subquery, Subquery): subquery = subquery.subquery(copy=False) return In( this=maybe_copy(self, copy), expressions=[convert(e, copy=copy) for e in expressions], query=subquery, unnest=( Unnest( expressions=[ maybe_parse(t.cast(ExpOrStr, e), copy=copy, **opts) for e in ensure_list(unnest) ] ) if unnest else None ), ) def between(self, low: t.Any, high: t.Any, copy: bool = True, **opts) -> Between: return Between( this=maybe_copy(self, copy), low=convert(low, copy=copy, **opts), high=convert(high, copy=copy, **opts), ) def is_(self, other: ExpOrStr) -> Is: return self._binop(Is, other) def like(self, other: ExpOrStr) -> Like: return self._binop(Like, other) def ilike(self, other: ExpOrStr) -> ILike: return self._binop(ILike, other) def eq(self, other: t.Any) -> EQ: return self._binop(EQ, other) def neq(self, other: t.Any) -> NEQ: return self._binop(NEQ, other) def rlike(self, other: ExpOrStr) -> RegexpLike: return self._binop(RegexpLike, other) def div(self, other: ExpOrStr, typed: bool = False, safe: bool = False) -> Div: div = self._binop(Div, other) div.args["typed"] = typed div.args["safe"] = safe return div def asc(self, nulls_first: bool = True) -> Ordered: return Ordered(this=self.copy(), nulls_first=nulls_first) def desc(self, nulls_first: bool = False) -> Ordered: return Ordered(this=self.copy(), desc=True, nulls_first=nulls_first) def __lt__(self, other: t.Any) -> LT: return self._binop(LT, other) def __le__(self, other: t.Any) -> LTE: return self._binop(LTE, other) def __gt__(self, other: t.Any) -> GT: return self._binop(GT, other) def __ge__(self, other: t.Any) -> GTE: return self._binop(GTE, other) def __add__(self, other: t.Any) -> Add: return self._binop(Add, other) def __radd__(self, other: t.Any) -> Add: return self._binop(Add, other, reverse=True) def __sub__(self, other: t.Any) -> Sub: return self._binop(Sub, other) def __rsub__(self, other: t.Any) -> Sub: return self._binop(Sub, other, reverse=True) def __mul__(self, other: t.Any) -> Mul: return self._binop(Mul, other) def __rmul__(self, other: t.Any) -> Mul: return self._binop(Mul, other, reverse=True) def __truediv__(self, other: t.Any) -> Div: return self._binop(Div, other) def __rtruediv__(self, other: t.Any) -> Div: return self._binop(Div, other, reverse=True) def __floordiv__(self, other: t.Any) -> IntDiv: return self._binop(IntDiv, other) def __rfloordiv__(self, other: t.Any) -> IntDiv: return self._binop(IntDiv, other, reverse=True) def __mod__(self, other: t.Any) -> Mod: return self._binop(Mod, other) def __rmod__(self, other: t.Any) -> Mod: return self._binop(Mod, other, reverse=True) def __pow__(self, other: t.Any) -> Pow: return self._binop(Pow, other) def __rpow__(self, other: t.Any) -> Pow: return self._binop(Pow, other, reverse=True) def __and__(self, other: t.Any) -> And: return self._binop(And, other) def __rand__(self, other: t.Any) -> And: return self._binop(And, other, reverse=True) def __or__(self, other: t.Any) -> Or: return self._binop(Or, other) def __ror__(self, other: t.Any) -> Or: return self._binop(Or, other, reverse=True) def __neg__(self) -> Neg: return Neg(this=_wrap(self.copy(), Binary)) def __invert__(self) -> Not: return not_(self.copy()) IntoType = t.Union[ str, t.Type[Expression], t.Collection[t.Union[str, t.Type[Expression]]], ] ExpOrStr = t.Union[str, Expression] class Condition(Expression): """Logical conditions like x AND y, or simply x""" class Predicate(Condition): """Relationships like x = y, x > 1, x >= y.""" class DerivedTable(Expression): @property def selects(self) -> t.List[Expression]: return self.this.selects if isinstance(self.this, Query) else [] @property def named_selects(self) -> t.List[str]: return [select.output_name for select in self.selects] class Query(Expression): def subquery(self, alias: t.Optional[ExpOrStr] = None, copy: bool = True) -> Subquery: """ Returns a `Subquery` that wraps around this query. Example: >>> subquery = Select().select("x").from_("tbl").subquery() >>> Select().select("x").from_(subquery).sql() 'SELECT x FROM (SELECT x FROM tbl)' Args: alias: an optional alias for the subquery. copy: if `False`, modify this expression instance in-place. """ instance = maybe_copy(self, copy) if not isinstance(alias, Expression): alias = TableAlias(this=to_identifier(alias)) if alias else None return Subquery(this=instance, alias=alias) def limit( self: Q, expression: ExpOrStr | int, dialect: DialectType = None, copy: bool = True, **opts ) -> Q: """ Adds a LIMIT clause to this query. Example: >>> select("1").union(select("1")).limit(1).sql() 'SELECT 1 UNION SELECT 1 LIMIT 1' Args: expression: the SQL code string to parse. This can also be an integer. If a `Limit` instance is passed, it will be used as-is. If another `Expression` instance is passed, it will be wrapped in a `Limit`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: A limited Select expression. """ return _apply_builder( expression=expression, instance=self, arg="limit", into=Limit, prefix="LIMIT", dialect=dialect, copy=copy, into_arg="expression", **opts, ) def offset( self: Q, expression: ExpOrStr | int, dialect: DialectType = None, copy: bool = True, **opts ) -> Q: """ Set the OFFSET expression. Example: >>> Select().from_("tbl").select("x").offset(10).sql() 'SELECT x FROM tbl OFFSET 10' Args: expression: the SQL code string to parse. This can also be an integer. If a `Offset` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `Offset`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_builder( expression=expression, instance=self, arg="offset", into=Offset, prefix="OFFSET", dialect=dialect, copy=copy, into_arg="expression", **opts, ) def order_by( self: Q, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Q: """ Set the ORDER BY expression. Example: >>> Select().from_("tbl").select("x").order_by("x DESC").sql() 'SELECT x FROM tbl ORDER BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `Order`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="order", append=append, copy=copy, prefix="ORDER BY", into=Order, dialect=dialect, **opts, ) @property def ctes(self) -> t.List[CTE]: """Returns a list of all the CTEs attached to this query.""" with_ = self.args.get("with") return with_.expressions if with_ else [] @property def selects(self) -> t.List[Expression]: """Returns the query's projections.""" raise NotImplementedError("Query objects must implement `selects`") @property def named_selects(self) -> t.List[str]: """Returns the output names of the query's projections.""" raise NotImplementedError("Query objects must implement `named_selects`") def select( self: Q, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Q: """ Append to or set the SELECT expressions. Example: >>> Select().select("x", "y").sql() 'SELECT x, y' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Query expression. """ raise NotImplementedError("Query objects must implement `select`") def with_( self: Q, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, scalar: bool = False, **opts, ) -> Q: """ Append to or set the common table expressions. Example: >>> Select().with_("tbl2", as_="SELECT * FROM tbl").select("x").from_("tbl2").sql() 'WITH tbl2 AS (SELECT * FROM tbl) SELECT x FROM tbl2' Args: alias: the SQL code string to parse as the table name. If an `Expression` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expression` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. scalar: if `True`, this is a scalar common table expression. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, scalar=scalar, **opts, ) def union( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Union: """ Builds a UNION expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").union("SELECT * FROM bla").sql() 'SELECT * FROM foo UNION SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expression` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Union expression. """ return union(self, *expressions, distinct=distinct, dialect=dialect, **opts) def intersect( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Intersect: """ Builds an INTERSECT expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").intersect("SELECT * FROM bla").sql() 'SELECT * FROM foo INTERSECT SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expression` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Intersect expression. """ return intersect(self, *expressions, distinct=distinct, dialect=dialect, **opts) def except_( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Except: """ Builds an EXCEPT expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").except_("SELECT * FROM bla").sql() 'SELECT * FROM foo EXCEPT SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expression` instance are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Except expression. """ return except_(self, *expressions, distinct=distinct, dialect=dialect, **opts) class UDTF(DerivedTable): @property def selects(self) -> t.List[Expression]: alias = self.args.get("alias") return alias.columns if alias else [] class Cache(Expression): arg_types = { "this": True, "lazy": False, "options": False, "expression": False, } class Uncache(Expression): arg_types = {"this": True, "exists": False} class Refresh(Expression): pass class DDL(Expression): @property def ctes(self) -> t.List[CTE]: """Returns a list of all the CTEs attached to this statement.""" with_ = self.args.get("with") return with_.expressions if with_ else [] @property def selects(self) -> t.List[Expression]: """If this statement contains a query (e.g. a CTAS), this returns the query's projections.""" return self.expression.selects if isinstance(self.expression, Query) else [] @property def named_selects(self) -> t.List[str]: """ If this statement contains a query (e.g. a CTAS), this returns the output names of the query's projections. """ return self.expression.named_selects if isinstance(self.expression, Query) else [] class DML(Expression): def returning( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts, ) -> "Self": """ Set the RETURNING expression. Not supported by all dialects. Example: >>> delete("tbl").returning("*", dialect="postgres").sql() 'DELETE FROM tbl RETURNING *' Args: expression: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_builder( expression=expression, instance=self, arg="returning", prefix="RETURNING", dialect=dialect, copy=copy, into=Returning, **opts, ) class Create(DDL): arg_types = { "with": False, "this": True, "kind": True, "expression": False, "exists": False, "properties": False, "replace": False, "refresh": False, "unique": False, "indexes": False, "no_schema_binding": False, "begin": False, "end": False, "clone": False, "concurrently": False, "clustered": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() class SequenceProperties(Expression): arg_types = { "increment": False, "minvalue": False, "maxvalue": False, "cache": False, "start": False, "owned": False, "options": False, } class TruncateTable(Expression): arg_types = { "expressions": True, "is_database": False, "exists": False, "only": False, "cluster": False, "identity": False, "option": False, "partition": False, } # https://docs.snowflake.com/en/sql-reference/sql/create-clone # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_clone_statement # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_copy class Clone(Expression): arg_types = {"this": True, "shallow": False, "copy": False} class Describe(Expression): arg_types = { "this": True, "style": False, "kind": False, "expressions": False, "partition": False, "format": False, } # https://duckdb.org/docs/sql/statements/attach.html#attach class Attach(Expression): arg_types = {"this": True, "exists": False, "expressions": False} # https://duckdb.org/docs/sql/statements/attach.html#detach class Detach(Expression): arg_types = {"this": True, "exists": False} # https://duckdb.org/docs/guides/meta/summarize.html class Summarize(Expression): arg_types = {"this": True, "table": False} class Kill(Expression): arg_types = {"this": True, "kind": False} class Pragma(Expression): pass class Declare(Expression): arg_types = {"expressions": True} class DeclareItem(Expression): arg_types = {"this": True, "kind": True, "default": False} class Set(Expression): arg_types = {"expressions": False, "unset": False, "tag": False} class Heredoc(Expression): arg_types = {"this": True, "tag": False} class SetItem(Expression): arg_types = { "this": False, "expressions": False, "kind": False, "collate": False, # MySQL SET NAMES statement "global": False, } class Show(Expression): arg_types = { "this": True, "history": False, "terse": False, "target": False, "offset": False, "starts_with": False, "limit": False, "from": False, "like": False, "where": False, "db": False, "scope": False, "scope_kind": False, "full": False, "mutex": False, "query": False, "channel": False, "global": False, "log": False, "position": False, "types": False, "privileges": False, } class UserDefinedFunction(Expression): arg_types = {"this": True, "expressions": False, "wrapped": False} class CharacterSet(Expression): arg_types = {"this": True, "default": False} class RecursiveWithSearch(Expression): arg_types = {"kind": True, "this": True, "expression": True, "using": False} class With(Expression): arg_types = {"expressions": True, "recursive": False, "search": False} @property def recursive(self) -> bool: return bool(self.args.get("recursive")) class WithinGroup(Expression): arg_types = {"this": True, "expression": False} # clickhouse supports scalar ctes # https://clickhouse.com/docs/en/sql-reference/statements/select/with class CTE(DerivedTable): arg_types = { "this": True, "alias": True, "scalar": False, "materialized": False, } class ProjectionDef(Expression): arg_types = {"this": True, "expression": True} class TableAlias(Expression): arg_types = {"this": False, "columns": False} @property def columns(self): return self.args.get("columns") or [] class BitString(Condition): pass class HexString(Condition): arg_types = {"this": True, "is_integer": False} class ByteString(Condition): pass class RawString(Condition): pass class UnicodeString(Condition): arg_types = {"this": True, "escape": False} class Column(Condition): arg_types = {"this": True, "table": False, "db": False, "catalog": False, "join_mark": False} @property def table(self) -> str: return self.text("table") @property def db(self) -> str: return self.text("db") @property def catalog(self) -> str: return self.text("catalog") @property def output_name(self) -> str: return self.name @property def parts(self) -> t.List[Identifier]: """Return the parts of a column in order catalog, db, table, name.""" return [ t.cast(Identifier, self.args[part]) for part in ("catalog", "db", "table", "this") if self.args.get(part) ] def to_dot(self) -> Dot | Identifier: """Converts the column into a dot expression.""" parts = self.parts parent = self.parent while parent: if isinstance(parent, Dot): parts.append(parent.expression) parent = parent.parent return Dot.build(deepcopy(parts)) if len(parts) > 1 else parts[0] class ColumnPosition(Expression): arg_types = {"this": False, "position": True} class ColumnDef(Expression): arg_types = { "this": True, "kind": False, "constraints": False, "exists": False, "position": False, "default": False, "output": False, } @property def constraints(self) -> t.List[ColumnConstraint]: return self.args.get("constraints") or [] @property def kind(self) -> t.Optional[DataType]: return self.args.get("kind") class AlterColumn(Expression): arg_types = { "this": True, "dtype": False, "collate": False, "using": False, "default": False, "drop": False, "comment": False, "allow_null": False, "visible": False, } # https://dev.mysql.com/doc/refman/8.0/en/invisible-indexes.html class AlterIndex(Expression): arg_types = {"this": True, "visible": True} # https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html class AlterDistStyle(Expression): pass class AlterSortKey(Expression): arg_types = {"this": False, "expressions": False, "compound": False} class AlterSet(Expression): arg_types = { "expressions": False, "option": False, "tablespace": False, "access_method": False, "file_format": False, "copy_options": False, "tag": False, "location": False, "serde": False, } class RenameColumn(Expression): arg_types = {"this": True, "to": True, "exists": False} class AlterRename(Expression): pass class SwapTable(Expression): pass class Comment(Expression): arg_types = { "this": True, "kind": True, "expression": True, "exists": False, "materialized": False, } class Comprehension(Expression): arg_types = {"this": True, "expression": True, "iterator": True, "condition": False} # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl class MergeTreeTTLAction(Expression): arg_types = { "this": True, "delete": False, "recompress": False, "to_disk": False, "to_volume": False, } # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl class MergeTreeTTL(Expression): arg_types = { "expressions": True, "where": False, "group": False, "aggregates": False, } # https://dev.mysql.com/doc/refman/8.0/en/create-table.html class IndexConstraintOption(Expression): arg_types = { "key_block_size": False, "using": False, "parser": False, "comment": False, "visible": False, "engine_attr": False, "secondary_engine_attr": False, } class ColumnConstraint(Expression): arg_types = {"this": False, "kind": True} @property def kind(self) -> ColumnConstraintKind: return self.args["kind"] class ColumnConstraintKind(Expression): pass class AutoIncrementColumnConstraint(ColumnConstraintKind): pass class PeriodForSystemTimeConstraint(ColumnConstraintKind): arg_types = {"this": True, "expression": True} class CaseSpecificColumnConstraint(ColumnConstraintKind): arg_types = {"not_": True} class CharacterSetColumnConstraint(ColumnConstraintKind): arg_types = {"this": True} class CheckColumnConstraint(ColumnConstraintKind): arg_types = {"this": True, "enforced": False} class ClusteredColumnConstraint(ColumnConstraintKind): pass class CollateColumnConstraint(ColumnConstraintKind): pass class CommentColumnConstraint(ColumnConstraintKind): pass class CompressColumnConstraint(ColumnConstraintKind): arg_types = {"this": False} class DateFormatColumnConstraint(ColumnConstraintKind): arg_types = {"this": True} class DefaultColumnConstraint(ColumnConstraintKind): pass class EncodeColumnConstraint(ColumnConstraintKind): pass # https://www.postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-EXCLUDE class ExcludeColumnConstraint(ColumnConstraintKind): pass class EphemeralColumnConstraint(ColumnConstraintKind): arg_types = {"this": False} class WithOperator(Expression): arg_types = {"this": True, "op": True} class GeneratedAsIdentityColumnConstraint(ColumnConstraintKind): # this: True -> ALWAYS, this: False -> BY DEFAULT arg_types = { "this": False, "expression": False, "on_null": False, "start": False, "increment": False, "minvalue": False, "maxvalue": False, "cycle": False, } class GeneratedAsRowColumnConstraint(ColumnConstraintKind): arg_types = {"start": False, "hidden": False} # https://dev.mysql.com/doc/refman/8.0/en/create-table.html # https://github.com/ClickHouse/ClickHouse/blob/master/src/Parsers/ParserCreateQuery.h#L646 class IndexColumnConstraint(ColumnConstraintKind): arg_types = { "this": False, "expressions": False, "kind": False, "index_type": False, "options": False, "expression": False, # Clickhouse "granularity": False, } class InlineLengthColumnConstraint(ColumnConstraintKind): pass class NonClusteredColumnConstraint(ColumnConstraintKind): pass class NotForReplicationColumnConstraint(ColumnConstraintKind): arg_types = {} # https://docs.snowflake.com/en/sql-reference/sql/create-table class MaskingPolicyColumnConstraint(ColumnConstraintKind): arg_types = {"this": True, "expressions": False} class NotNullColumnConstraint(ColumnConstraintKind): arg_types = {"allow_null": False} # https://dev.mysql.com/doc/refman/5.7/en/timestamp-initialization.html class OnUpdateColumnConstraint(ColumnConstraintKind): pass # https://docs.snowflake.com/en/sql-reference/sql/create-external-table#optional-parameters class TransformColumnConstraint(ColumnConstraintKind): pass class PrimaryKeyColumnConstraint(ColumnConstraintKind): arg_types = {"desc": False} class TitleColumnConstraint(ColumnConstraintKind): pass class UniqueColumnConstraint(ColumnConstraintKind): arg_types = {"this": False, "index_type": False, "on_conflict": False, "nulls": False} class UppercaseColumnConstraint(ColumnConstraintKind): arg_types: t.Dict[str, t.Any] = {} # https://docs.risingwave.com/processing/watermarks#syntax class WatermarkColumnConstraint(Expression): arg_types = {"this": True, "expression": True} class PathColumnConstraint(ColumnConstraintKind): pass # https://docs.snowflake.com/en/sql-reference/sql/create-table class ProjectionPolicyColumnConstraint(ColumnConstraintKind): pass # computed column expression # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-transact-sql?view=sql-server-ver16 class ComputedColumnConstraint(ColumnConstraintKind): arg_types = {"this": True, "persisted": False, "not_null": False} class Constraint(Expression): arg_types = {"this": True, "expressions": True} class Delete(DML): arg_types = { "with": False, "this": False, "using": False, "where": False, "returning": False, "limit": False, "tables": False, # Multiple-Table Syntax (MySQL) "cluster": False, # Clickhouse } def delete( self, table: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts, ) -> Delete: """ Create a DELETE expression or replace the table on an existing DELETE expression. Example: >>> delete("tbl").sql() 'DELETE FROM tbl' Args: table: the table from which to delete. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_builder( expression=table, instance=self, arg="this", dialect=dialect, into=Table, copy=copy, **opts, ) def where( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Delete: """ Append to or set the WHERE expressions. Example: >>> delete("tbl").where("x = 'a' OR x < 'b'").sql() "DELETE FROM tbl WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) class Drop(Expression): arg_types = { "this": False, "kind": False, "expressions": False, "exists": False, "temporary": False, "materialized": False, "cascade": False, "constraints": False, "purge": False, "cluster": False, "concurrently": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() # https://cloud.google.com/bigquery/docs/reference/standard-sql/export-statements class Export(Expression): arg_types = {"this": True, "connection": False, "options": True} class Filter(Expression): arg_types = {"this": True, "expression": True} class Check(Expression): pass class Changes(Expression): arg_types = {"information": True, "at_before": False, "end": False} # https://docs.snowflake.com/en/sql-reference/constructs/connect-by class Connect(Expression): arg_types = {"start": False, "connect": True, "nocycle": False} class CopyParameter(Expression): arg_types = {"this": True, "expression": False, "expressions": False} class Copy(DML): arg_types = { "this": True, "kind": True, "files": True, "credentials": False, "format": False, "params": False, } class Credentials(Expression): arg_types = { "credentials": False, "encryption": False, "storage": False, "iam_role": False, "region": False, } class Prior(Expression): pass class Directory(Expression): # https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-dml-insert-overwrite-directory-hive.html arg_types = {"this": True, "local": False, "row_format": False} class ForeignKey(Expression): arg_types = { "expressions": False, "reference": False, "delete": False, "update": False, } class ColumnPrefix(Expression): arg_types = {"this": True, "expression": True} class PrimaryKey(Expression): arg_types = {"expressions": True, "options": False} # https://www.postgresql.org/docs/9.1/sql-selectinto.html # https://docs.aws.amazon.com/redshift/latest/dg/r_SELECT_INTO.html#r_SELECT_INTO-examples class Into(Expression): arg_types = { "this": False, "temporary": False, "unlogged": False, "bulk_collect": False, "expressions": False, } class From(Expression): @property def name(self) -> str: return self.this.name @property def alias_or_name(self) -> str: return self.this.alias_or_name class Having(Expression): pass class Hint(Expression): arg_types = {"expressions": True} class JoinHint(Expression): arg_types = {"this": True, "expressions": True} class Identifier(Expression): arg_types = {"this": True, "quoted": False, "global": False, "temporary": False} @property def quoted(self) -> bool: return bool(self.args.get("quoted")) @property def hashable_args(self) -> t.Any: return (self.this, self.quoted) @property def output_name(self) -> str: return self.name # https://www.postgresql.org/docs/current/indexes-opclass.html class Opclass(Expression): arg_types = {"this": True, "expression": True} class Index(Expression): arg_types = { "this": False, "table": False, "unique": False, "primary": False, "amp": False, # teradata "params": False, } class IndexParameters(Expression): arg_types = { "using": False, "include": False, "columns": False, "with_storage": False, "partition_by": False, "tablespace": False, "where": False, "on": False, } class Insert(DDL, DML): arg_types = { "hint": False, "with": False, "is_function": False, "this": False, "expression": False, "conflict": False, "returning": False, "overwrite": False, "exists": False, "alternative": False, "where": False, "ignore": False, "by_name": False, "stored": False, "partition": False, "settings": False, "source": False, } def with_( self, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Insert: """ Append to or set the common table expressions. Example: >>> insert("SELECT x FROM cte", "t").with_("cte", as_="SELECT * FROM tbl").sql() 'WITH cte AS (SELECT * FROM tbl) INSERT INTO t SELECT x FROM cte' Args: alias: the SQL code string to parse as the table name. If an `Expression` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expression` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, **opts, ) class ConditionalInsert(Expression): arg_types = {"this": True, "expression": False, "else_": False} class MultitableInserts(Expression): arg_types = {"expressions": True, "kind": True, "source": True} class OnConflict(Expression): arg_types = { "duplicate": False, "expressions": False, "action": False, "conflict_keys": False, "constraint": False, "where": False, } class OnCondition(Expression): arg_types = {"error": False, "empty": False, "null": False} class Returning(Expression): arg_types = {"expressions": True, "into": False} # https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html class Introducer(Expression): arg_types = {"this": True, "expression": True} # national char, like n'utf8' class National(Expression): pass class LoadData(Expression): arg_types = { "this": True, "local": False, "overwrite": False, "inpath": True, "partition": False, "input_format": False, "serde": False, } class Partition(Expression): arg_types = {"expressions": True, "subpartition": False} class PartitionRange(Expression): arg_types = {"this": True, "expression": True} # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression class PartitionId(Expression): pass class Fetch(Expression): arg_types = { "direction": False, "count": False, "limit_options": False, } class Grant(Expression): arg_types = { "privileges": True, "kind": False, "securable": True, "principals": True, "grant_option": False, } class Group(Expression): arg_types = { "expressions": False, "grouping_sets": False, "cube": False, "rollup": False, "totals": False, "all": False, } class Cube(Expression): arg_types = {"expressions": False} class Rollup(Expression): arg_types = {"expressions": False} class GroupingSets(Expression): arg_types = {"expressions": True} class Lambda(Expression): arg_types = {"this": True, "expressions": True} class Limit(Expression): arg_types = { "this": False, "expression": True, "offset": False, "limit_options": False, "expressions": False, } class LimitOptions(Expression): arg_types = { "percent": False, "rows": False, "with_ties": False, } class Literal(Condition): arg_types = {"this": True, "is_string": True} @property def hashable_args(self) -> t.Any: return (self.this, self.args.get("is_string")) @classmethod def number(cls, number) -> Literal: return cls(this=str(number), is_string=False) @classmethod def string(cls, string) -> Literal: return cls(this=str(string), is_string=True) @property def output_name(self) -> str: return self.name def to_py(self) -> int | str | Decimal: if self.is_number: try: return int(self.this) except ValueError: return Decimal(self.this) return self.this class Join(Expression): arg_types = { "this": True, "on": False, "side": False, "kind": False, "using": False, "method": False, "global": False, "hint": False, "match_condition": False, # Snowflake "expressions": False, } @property def method(self) -> str: return self.text("method").upper() @property def kind(self) -> str: return self.text("kind").upper() @property def side(self) -> str: return self.text("side").upper() @property def hint(self) -> str: return self.text("hint").upper() @property def alias_or_name(self) -> str: return self.this.alias_or_name @property def is_semi_or_anti_join(self) -> bool: return self.kind in ("SEMI", "ANTI") def on( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Join: """ Append to or set the ON expressions. Example: >>> import sqlglot >>> sqlglot.parse_one("JOIN x", into=Join).on("y = 1").sql() 'JOIN x ON y = 1' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Join expression. """ join = _apply_conjunction_builder( *expressions, instance=self, arg="on", append=append, dialect=dialect, copy=copy, **opts, ) if join.kind == "CROSS": join.set("kind", None) return join def using( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Join: """ Append to or set the USING expressions. Example: >>> import sqlglot >>> sqlglot.parse_one("JOIN x", into=Join).using("foo", "bla").sql() 'JOIN x USING (foo, bla)' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. append: if `True`, concatenate the new expressions to the existing "using" list. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Join expression. """ join = _apply_list_builder( *expressions, instance=self, arg="using", append=append, dialect=dialect, copy=copy, **opts, ) if join.kind == "CROSS": join.set("kind", None) return join class Lateral(UDTF): arg_types = { "this": True, "view": False, "outer": False, "alias": False, "cross_apply": False, # True -> CROSS APPLY, False -> OUTER APPLY } # https://docs.snowflake.com/sql-reference/literals-table # https://docs.snowflake.com/en/sql-reference/functions-table#using-a-table-function class TableFromRows(UDTF): arg_types = { "this": True, "alias": False, "joins": False, "pivots": False, "sample": False, } class MatchRecognizeMeasure(Expression): arg_types = { "this": True, "window_frame": False, } class MatchRecognize(Expression): arg_types = { "partition_by": False, "order": False, "measures": False, "rows": False, "after": False, "pattern": False, "define": False, "alias": False, } # Clickhouse FROM FINAL modifier # https://clickhouse.com/docs/en/sql-reference/statements/select/from/#final-modifier class Final(Expression): pass class Offset(Expression): arg_types = {"this": False, "expression": True, "expressions": False} class Order(Expression): arg_types = {"this": False, "expressions": True, "siblings": False} # https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier class WithFill(Expression): arg_types = { "from": False, "to": False, "step": False, "interpolate": False, } # hive specific sorts # https://cwiki.apache.org/confluence/display/Hive/LanguageManual+SortBy class Cluster(Order): pass class Distribute(Order): pass class Sort(Order): pass class Ordered(Expression): arg_types = {"this": True, "desc": False, "nulls_first": True, "with_fill": False} class Property(Expression): arg_types = {"this": True, "value": True} class GrantPrivilege(Expression): arg_types = {"this": True, "expressions": False} class GrantPrincipal(Expression): arg_types = {"this": True, "kind": False} class AllowedValuesProperty(Expression): arg_types = {"expressions": True} class AlgorithmProperty(Property): arg_types = {"this": True} class AutoIncrementProperty(Property): arg_types = {"this": True} # https://docs.aws.amazon.com/prescriptive-guidance/latest/materialized-views-redshift/refreshing-materialized-views.html class AutoRefreshProperty(Property): arg_types = {"this": True} class BackupProperty(Property): arg_types = {"this": True} class BlockCompressionProperty(Property): arg_types = { "autotemp": False, "always": False, "default": False, "manual": False, "never": False, } class CharacterSetProperty(Property): arg_types = {"this": True, "default": True} class ChecksumProperty(Property): arg_types = {"on": False, "default": False} class CollateProperty(Property): arg_types = {"this": True, "default": False} class CopyGrantsProperty(Property): arg_types = {} class DataBlocksizeProperty(Property): arg_types = { "size": False, "units": False, "minimum": False, "maximum": False, "default": False, } class DataDeletionProperty(Property): arg_types = {"on": True, "filter_col": False, "retention_period": False} class DefinerProperty(Property): arg_types = {"this": True} class DistKeyProperty(Property): arg_types = {"this": True} # https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/CREATE_TABLE/#distribution_desc # https://doris.apache.org/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE?_highlight=create&_highlight=table#distribution_desc class DistributedByProperty(Property): arg_types = {"expressions": False, "kind": True, "buckets": False, "order": False} class DistStyleProperty(Property): arg_types = {"this": True} class DuplicateKeyProperty(Property): arg_types = {"expressions": True} class EngineProperty(Property): arg_types = {"this": True} class HeapProperty(Property): arg_types = {} class ToTableProperty(Property): arg_types = {"this": True} class ExecuteAsProperty(Property): arg_types = {"this": True} class ExternalProperty(Property): arg_types = {"this": False} class FallbackProperty(Property): arg_types = {"no": True, "protection": False} class FileFormatProperty(Property): arg_types = {"this": True} class FreespaceProperty(Property): arg_types = {"this": True, "percent": False} class GlobalProperty(Property): arg_types = {} class IcebergProperty(Property): arg_types = {} class InheritsProperty(Property): arg_types = {"expressions": True} class InputModelProperty(Property): arg_types = {"this": True} class OutputModelProperty(Property): arg_types = {"this": True} class IsolatedLoadingProperty(Property): arg_types = {"no": False, "concurrent": False, "target": False} class JournalProperty(Property): arg_types = { "no": False, "dual": False, "before": False, "local": False, "after": False, } class LanguageProperty(Property): arg_types = {"this": True} # spark ddl class ClusteredByProperty(Property): arg_types = {"expressions": True, "sorted_by": False, "buckets": True} class DictProperty(Property): arg_types = {"this": True, "kind": True, "settings": False} class DictSubProperty(Property): pass class DictRange(Property): arg_types = {"this": True, "min": True, "max": True} class DynamicProperty(Property): arg_types = {} # Clickhouse CREATE ... ON CLUSTER modifier # https://clickhouse.com/docs/en/sql-reference/distributed-ddl class OnCluster(Property): arg_types = {"this": True} # Clickhouse EMPTY table "property" class EmptyProperty(Property): arg_types = {} class LikeProperty(Property): arg_types = {"this": True, "expressions": False} class LocationProperty(Property): arg_types = {"this": True} class LockProperty(Property): arg_types = {"this": True} class LockingProperty(Property): arg_types = { "this": False, "kind": True, "for_or_in": False, "lock_type": True, "override": False, } class LogProperty(Property): arg_types = {"no": True} class MaterializedProperty(Property): arg_types = {"this": False} class MergeBlockRatioProperty(Property): arg_types = {"this": False, "no": False, "default": False, "percent": False} class NoPrimaryIndexProperty(Property): arg_types = {} class OnProperty(Property): arg_types = {"this": True} class OnCommitProperty(Property): arg_types = {"delete": False} class PartitionedByProperty(Property): arg_types = {"this": True} # https://docs.starrocks.io/docs/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE/ class PartitionByRangeProperty(Property): arg_types = {"partition_expressions": True, "create_expressions": True} # https://docs.starrocks.io/docs/table_design/data_distribution/#range-partitioning class PartitionByRangePropertyDynamic(Expression): arg_types = {"this": False, "start": True, "end": True, "every": True} # https://docs.starrocks.io/docs/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE/ class UniqueKeyProperty(Property): arg_types = {"expressions": True} # https://www.postgresql.org/docs/current/sql-createtable.html class PartitionBoundSpec(Expression): # this -> IN / MODULUS, expression -> REMAINDER, from_expressions -> FROM (...), to_expressions -> TO (...) arg_types = { "this": False, "expression": False, "from_expressions": False, "to_expressions": False, } class PartitionedOfProperty(Property): # this -> parent_table (schema), expression -> FOR VALUES ... / DEFAULT arg_types = {"this": True, "expression": True} class StreamingTableProperty(Property): arg_types = {} class RemoteWithConnectionModelProperty(Property): arg_types = {"this": True} class ReturnsProperty(Property): arg_types = {"this": False, "is_table": False, "table": False, "null": False} class StrictProperty(Property): arg_types = {} class RowFormatProperty(Property): arg_types = {"this": True} class RowFormatDelimitedProperty(Property): # https://cwiki.apache.org/confluence/display/hive/languagemanual+dml arg_types = { "fields": False, "escaped": False, "collection_items": False, "map_keys": False, "lines": False, "null": False, "serde": False, } class RowFormatSerdeProperty(Property): arg_types = {"this": True, "serde_properties": False} # https://spark.apache.org/docs/3.1.2/sql-ref-syntax-qry-select-transform.html class QueryTransform(Expression): arg_types = { "expressions": True, "command_script": True, "schema": False, "row_format_before": False, "record_writer": False, "row_format_after": False, "record_reader": False, } class SampleProperty(Property): arg_types = {"this": True} # https://prestodb.io/docs/current/sql/create-view.html#synopsis class SecurityProperty(Property): arg_types = {"this": True} class SchemaCommentProperty(Property): arg_types = {"this": True} class SerdeProperties(Property): arg_types = {"expressions": True, "with": False} class SetProperty(Property): arg_types = {"multi": True} class SharingProperty(Property): arg_types = {"this": False} class SetConfigProperty(Property): arg_types = {"this": True} class SettingsProperty(Property): arg_types = {"expressions": True} class SortKeyProperty(Property): arg_types = {"this": True, "compound": False} class SqlReadWriteProperty(Property): arg_types = {"this": True} class SqlSecurityProperty(Property): arg_types = {"definer": True} class StabilityProperty(Property): arg_types = {"this": True} class StorageHandlerProperty(Property): arg_types = {"this": True} class TemporaryProperty(Property): arg_types = {"this": False} class SecureProperty(Property): arg_types = {} # https://docs.snowflake.com/en/sql-reference/sql/create-table class Tags(ColumnConstraintKind, Property): arg_types = {"expressions": True} class TransformModelProperty(Property): arg_types = {"expressions": True} class TransientProperty(Property): arg_types = {"this": False} class UnloggedProperty(Property): arg_types = {} # https://docs.snowflake.com/en/sql-reference/sql/create-table#create-table-using-template class UsingTemplateProperty(Property): arg_types = {"this": True} # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql?view=sql-server-ver16 class ViewAttributeProperty(Property): arg_types = {"this": True} class VolatileProperty(Property): arg_types = {"this": False} class WithDataProperty(Property): arg_types = {"no": True, "statistics": False} class WithJournalTableProperty(Property): arg_types = {"this": True} class WithSchemaBindingProperty(Property): arg_types = {"this": True} class WithSystemVersioningProperty(Property): arg_types = { "on": False, "this": False, "data_consistency": False, "retention_period": False, "with": True, } class WithProcedureOptions(Property): arg_types = {"expressions": True} class EncodeProperty(Property): arg_types = {"this": True, "properties": False, "key": False} class IncludeProperty(Property): arg_types = {"this": True, "alias": False, "column_def": False} class ForceProperty(Property): arg_types = {} class Properties(Expression): arg_types = {"expressions": True} NAME_TO_PROPERTY = { "ALGORITHM": AlgorithmProperty, "AUTO_INCREMENT": AutoIncrementProperty, "CHARACTER SET": CharacterSetProperty, "CLUSTERED_BY": ClusteredByProperty, "COLLATE": CollateProperty, "COMMENT": SchemaCommentProperty, "DEFINER": DefinerProperty, "DISTKEY": DistKeyProperty, "DISTRIBUTED_BY": DistributedByProperty, "DISTSTYLE": DistStyleProperty, "ENGINE": EngineProperty, "EXECUTE AS": ExecuteAsProperty, "FORMAT": FileFormatProperty, "LANGUAGE": LanguageProperty, "LOCATION": LocationProperty, "LOCK": LockProperty, "PARTITIONED_BY": PartitionedByProperty, "RETURNS": ReturnsProperty, "ROW_FORMAT": RowFormatProperty, "SORTKEY": SortKeyProperty, "ENCODE": EncodeProperty, "INCLUDE": IncludeProperty, } PROPERTY_TO_NAME = {v: k for k, v in NAME_TO_PROPERTY.items()} # CREATE property locations # Form: schema specified # create [POST_CREATE] # table a [POST_NAME] # (b int) [POST_SCHEMA] # with ([POST_WITH]) # index (b) [POST_INDEX] # # Form: alias selection # create [POST_CREATE] # table a [POST_NAME] # as [POST_ALIAS] (select * from b) [POST_EXPRESSION] # index (c) [POST_INDEX] class Location(AutoName): POST_CREATE = auto() POST_NAME = auto() POST_SCHEMA = auto() POST_WITH = auto() POST_ALIAS = auto() POST_EXPRESSION = auto() POST_INDEX = auto() UNSUPPORTED = auto() @classmethod def from_dict(cls, properties_dict: t.Dict) -> Properties: expressions = [] for key, value in properties_dict.items(): property_cls = cls.NAME_TO_PROPERTY.get(key.upper()) if property_cls: expressions.append(property_cls(this=convert(value))) else: expressions.append(Property(this=Literal.string(key), value=convert(value))) return cls(expressions=expressions) class Qualify(Expression): pass class InputOutputFormat(Expression): arg_types = {"input_format": False, "output_format": False} # https://www.ibm.com/docs/en/ias?topic=procedures-return-statement-in-sql class Return(Expression): pass class Reference(Expression): arg_types = {"this": True, "expressions": False, "options": False} class Tuple(Expression): arg_types = {"expressions": False} def isin( self, *expressions: t.Any, query: t.Optional[ExpOrStr] = None, unnest: t.Optional[ExpOrStr] | t.Collection[ExpOrStr] = None, copy: bool = True, **opts, ) -> In: return In( this=maybe_copy(self, copy), expressions=[convert(e, copy=copy) for e in expressions], query=maybe_parse(query, copy=copy, **opts) if query else None, unnest=( Unnest( expressions=[ maybe_parse(t.cast(ExpOrStr, e), copy=copy, **opts) for e in ensure_list(unnest) ] ) if unnest else None ), ) QUERY_MODIFIERS = { "match": False, "laterals": False, "joins": False, "connect": False, "pivots": False, "prewhere": False, "where": False, "group": False, "having": False, "qualify": False, "windows": False, "distribute": False, "sort": False, "cluster": False, "order": False, "limit": False, "offset": False, "locks": False, "sample": False, "settings": False, "format": False, "options": False, } # https://learn.microsoft.com/en-us/sql/t-sql/queries/option-clause-transact-sql?view=sql-server-ver16 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-query?view=sql-server-ver16 class QueryOption(Expression): arg_types = {"this": True, "expression": False} # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 class WithTableHint(Expression): arg_types = {"expressions": True} # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html class IndexTableHint(Expression): arg_types = {"this": True, "expressions": False, "target": False} # https://docs.snowflake.com/en/sql-reference/constructs/at-before class HistoricalData(Expression): arg_types = {"this": True, "kind": True, "expression": True} # https://docs.snowflake.com/en/sql-reference/sql/put class Put(Expression): arg_types = {"this": True, "target": True, "properties": False} class Table(Expression): arg_types = { "this": False, "alias": False, "db": False, "catalog": False, "laterals": False, "joins": False, "pivots": False, "hints": False, "system_time": False, "version": False, "format": False, "pattern": False, "ordinality": False, "when": False, "only": False, "partition": False, "changes": False, "rows_from": False, "sample": False, } @property def name(self) -> str: if not self.this or isinstance(self.this, Func): return "" return self.this.name @property def db(self) -> str: return self.text("db") @property def catalog(self) -> str: return self.text("catalog") @property def selects(self) -> t.List[Expression]: return [] @property def named_selects(self) -> t.List[str]: return [] @property def parts(self) -> t.List[Expression]: """Return the parts of a table in order catalog, db, table.""" parts: t.List[Expression] = [] for arg in ("catalog", "db", "this"): part = self.args.get(arg) if isinstance(part, Dot): parts.extend(part.flatten()) elif isinstance(part, Expression): parts.append(part) return parts def to_column(self, copy: bool = True) -> Expression: parts = self.parts last_part = parts[-1] if isinstance(last_part, Identifier): col: Expression = column(*reversed(parts[0:4]), fields=parts[4:], copy=copy) # type: ignore else: # This branch will be reached if a function or array is wrapped in a `Table` col = last_part alias = self.args.get("alias") if alias: col = alias_(col, alias.this, copy=copy) return col class SetOperation(Query): arg_types = { "with": False, "this": True, "expression": True, "distinct": False, "by_name": False, **QUERY_MODIFIERS, } def select( self: S, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> S: this = maybe_copy(self, copy) this.this.unnest().select(*expressions, append=append, dialect=dialect, copy=False, **opts) this.expression.unnest().select( *expressions, append=append, dialect=dialect, copy=False, **opts ) return this @property def named_selects(self) -> t.List[str]: return self.this.unnest().named_selects @property def is_star(self) -> bool: return self.this.is_star or self.expression.is_star @property def selects(self) -> t.List[Expression]: return self.this.unnest().selects @property def left(self) -> Query: return self.this @property def right(self) -> Query: return self.expression class Union(SetOperation): pass class Except(SetOperation): pass class Intersect(SetOperation): pass class Update(DML): arg_types = { "with": False, "this": False, "expressions": True, "from": False, "where": False, "returning": False, "order": False, "limit": False, } def table( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts ) -> Update: """ Set the table to update. Example: >>> Update().table("my_table").set_("x = 1").sql() 'UPDATE my_table SET x = 1' Args: expression : the SQL code strings to parse. If a `Table` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `Table`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Update expression. """ return _apply_builder( expression=expression, instance=self, arg="this", into=Table, prefix=None, dialect=dialect, copy=copy, **opts, ) def set_( self, *expressions: ExpOrStr, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Append to or set the SET expressions. Example: >>> Update().table("my_table").set_("x = 1").sql() 'UPDATE my_table SET x = 1' Args: *expressions: the SQL code strings to parse. If `Expression` instance(s) are passed, they will be used as-is. Multiple expressions are combined with a comma. append: if `True`, add the new expressions to any existing SET expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. """ return _apply_list_builder( *expressions, instance=self, arg="expressions", append=append, into=Expression, prefix=None, dialect=dialect, copy=copy, **opts, ) def where( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the WHERE expressions. Example: >>> Update().table("tbl").set_("x = 1").where("x = 'a' OR x < 'b'").sql() "UPDATE tbl SET x = 1 WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Select: the modified expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) def from_( self, expression: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Set the FROM expression. Example: >>> Update().table("my_table").set_("x = 1").from_("baz").sql() 'UPDATE my_table SET x = 1 FROM baz' Args: expression : the SQL code strings to parse. If a `From` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `From`. If nothing is passed in then a from is not applied to the expression dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Update expression. """ if not expression: return maybe_copy(self, copy) return _apply_builder( expression=expression, instance=self, arg="from", into=From, prefix="FROM", dialect=dialect, copy=copy, **opts, ) def with_( self, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Append to or set the common table expressions. Example: >>> Update().table("my_table").set_("x = 1").from_("baz").with_("baz", "SELECT id FROM foo").sql() 'WITH baz AS (SELECT id FROM foo) UPDATE my_table SET x = 1 FROM baz' Args: alias: the SQL code string to parse as the table name. If an `Expression` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expression` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, **opts, ) class Values(UDTF): arg_types = {"expressions": True, "alias": False} class Var(Expression): pass class Version(Expression): """ Time travel, iceberg, bigquery etc https://trino.io/docs/current/connector/iceberg.html?highlight=snapshot#using-snapshots https://www.databricks.com/blog/2019/02/04/introducing-delta-time-travel-for-large-scale-data-lakes.html https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#for_system_time_as_of https://learn.microsoft.com/en-us/sql/relational-databases/tables/querying-data-in-a-system-versioned-temporal-table?view=sql-server-ver16 this is either TIMESTAMP or VERSION kind is ("AS OF", "BETWEEN") """ arg_types = {"this": True, "kind": True, "expression": False} class Schema(Expression): arg_types = {"this": False, "expressions": False} # https://dev.mysql.com/doc/refman/8.0/en/select.html # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/SELECT.html class Lock(Expression): arg_types = {"update": True, "expressions": False, "wait": False} class Select(Query): arg_types = { "with": False, "kind": False, "expressions": False, "hint": False, "distinct": False, "into": False, "from": False, "operation_modifiers": False, **QUERY_MODIFIERS, } def from_( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts ) -> Select: """ Set the FROM expression. Example: >>> Select().from_("tbl").select("x").sql() 'SELECT x FROM tbl' Args: expression : the SQL code strings to parse. If a `From` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `From`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_builder( expression=expression, instance=self, arg="from", into=From, prefix="FROM", dialect=dialect, copy=copy, **opts, ) def group_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the GROUP BY expression. Example: >>> Select().from_("tbl").select("x", "COUNT(1)").group_by("x").sql() 'SELECT x, COUNT(1) FROM tbl GROUP BY x' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `Group`. If nothing is passed in then a group by is not applied to the expression append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Group` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ if not expressions: return self if not copy else self.copy() return _apply_child_list_builder( *expressions, instance=self, arg="group", append=append, copy=copy, prefix="GROUP BY", into=Group, dialect=dialect, **opts, ) def sort_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the SORT BY expression. Example: >>> Select().from_("tbl").select("x").sort_by("x DESC").sql(dialect="hive") 'SELECT x FROM tbl SORT BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `SORT`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="sort", append=append, copy=copy, prefix="SORT BY", into=Sort, dialect=dialect, **opts, ) def cluster_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the CLUSTER BY expression. Example: >>> Select().from_("tbl").select("x").cluster_by("x DESC").sql(dialect="hive") 'SELECT x FROM tbl CLUSTER BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expression` instance is passed, it will be wrapped in a `Cluster`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="cluster", append=append, copy=copy, prefix="CLUSTER BY", into=Cluster, dialect=dialect, **opts, ) def select( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_list_builder( *expressions, instance=self, arg="expressions", append=append, dialect=dialect, into=Expression, copy=copy, **opts, ) def lateral( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the LATERAL expressions. Example: >>> Select().select("x").lateral("OUTER explode(y) tbl2 AS z").from_("tbl").sql() 'SELECT x FROM tbl LATERAL VIEW OUTER EXPLODE(y) tbl2 AS z' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_list_builder( *expressions, instance=self, arg="laterals", append=append, into=Lateral, prefix="LATERAL VIEW", dialect=dialect, copy=copy, **opts, ) def join( self, expression: ExpOrStr, on: t.Optional[ExpOrStr] = None, using: t.Optional[ExpOrStr | t.Collection[ExpOrStr]] = None, append: bool = True, join_type: t.Optional[str] = None, join_alias: t.Optional[Identifier | str] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the JOIN expressions. Example: >>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y").sql() 'SELECT * FROM tbl JOIN tbl2 ON tbl1.y = tbl2.y' >>> Select().select("1").from_("a").join("b", using=["x", "y", "z"]).sql() 'SELECT 1 FROM a JOIN b USING (x, y, z)' Use `join_type` to change the type of join: >>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y", join_type="left outer").sql() 'SELECT * FROM tbl LEFT OUTER JOIN tbl2 ON tbl1.y = tbl2.y' Args: expression: the SQL code string to parse. If an `Expression` instance is passed, it will be used as-is. on: optionally specify the join "on" criteria as a SQL string. If an `Expression` instance is passed, it will be used as-is. using: optionally specify the join "using" criteria as a SQL string. If an `Expression` instance is passed, it will be used as-is. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. join_type: if set, alter the parsed join type. join_alias: an optional alias for the joined source. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Select: the modified expression. """ parse_args: t.Dict[str, t.Any] = {"dialect": dialect, **opts} try: expression = maybe_parse(expression, into=Join, prefix="JOIN", **parse_args) except ParseError: expression = maybe_parse(expression, into=(Join, Expression), **parse_args) join = expression if isinstance(expression, Join) else Join(this=expression) if isinstance(join.this, Select): join.this.replace(join.this.subquery()) if join_type: method: t.Optional[Token] side: t.Optional[Token] kind: t.Optional[Token] method, side, kind = maybe_parse(join_type, into="JOIN_TYPE", **parse_args) # type: ignore if method: join.set("method", method.text) if side: join.set("side", side.text) if kind: join.set("kind", kind.text) if on: on = and_(*ensure_list(on), dialect=dialect, copy=copy, **opts) join.set("on", on) if using: join = _apply_list_builder( *ensure_list(using), instance=join, arg="using", append=append, copy=copy, into=Identifier, **opts, ) if join_alias: join.set("this", alias_(join.this, join_alias, table=True)) return _apply_list_builder( join, instance=self, arg="joins", append=append, copy=copy, **opts, ) def where( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the WHERE expressions. Example: >>> Select().select("x").from_("tbl").where("x = 'a' OR x < 'b'").sql() "SELECT x FROM tbl WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Select: the modified expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) def having( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the HAVING expressions. Example: >>> Select().select("x", "COUNT(y)").from_("tbl").group_by("x").having("COUNT(y) > 3").sql() 'SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 3' Args: *expressions: the SQL code strings to parse. If an `Expression` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="having", append=append, into=Having, dialect=dialect, copy=copy, **opts, ) def window( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_list_builder( *expressions, instance=self, arg="windows", append=append, into=Window, dialect=dialect, copy=copy, **opts, ) def qualify( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_conjunction_builder( *expressions, instance=self, arg="qualify", append=append, into=Qualify, dialect=dialect, copy=copy, **opts, ) def distinct( self, *ons: t.Optional[ExpOrStr], distinct: bool = True, copy: bool = True ) -> Select: """ Set the OFFSET expression. Example: >>> Select().from_("tbl").select("x").distinct().sql() 'SELECT DISTINCT x FROM tbl' Args: ons: the expressions to distinct on distinct: whether the Select should be distinct copy: if `False`, modify this expression instance in-place. Returns: Select: the modified expression. """ instance = maybe_copy(self, copy) on = Tuple(expressions=[maybe_parse(on, copy=copy) for on in ons if on]) if ons else None instance.set("distinct", Distinct(on=on) if distinct else None) return instance def ctas( self, table: ExpOrStr, properties: t.Optional[t.Dict] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Create: """ Convert this expression to a CREATE TABLE AS statement. Example: >>> Select().select("*").from_("tbl").ctas("x").sql() 'CREATE TABLE x AS SELECT * FROM tbl' Args: table: the SQL code string to parse as the table name. If another `Expression` instance is passed, it will be used as-is. properties: an optional mapping of table properties dialect: the dialect used to parse the input table. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input table. Returns: The new Create expression. """ instance = maybe_copy(self, copy) table_expression = maybe_parse(table, into=Table, dialect=dialect, **opts) properties_expression = None if properties: properties_expression = Properties.from_dict(properties) return Create( this=table_expression, kind="TABLE", expression=instance, properties=properties_expression, ) def lock(self, update: bool = True, copy: bool = True) -> Select: """ Set the locking read mode for this expression. Examples: >>> Select().select("x").from_("tbl").where("x = 'a'").lock().sql("mysql") "SELECT x FROM tbl WHERE x = 'a' FOR UPDATE" >>> Select().select("x").from_("tbl").where("x = 'a'").lock(update=False).sql("mysql") "SELECT x FROM tbl WHERE x = 'a' FOR SHARE" Args: update: if `True`, the locking type will be `FOR UPDATE`, else it will be `FOR SHARE`. copy: if `False`, modify this expression instance in-place. Returns: The modified expression. """ inst = maybe_copy(self, copy) inst.set("locks", [Lock(update=update)]) return inst def hint(self, *hints: ExpOrStr, dialect: DialectType = None, copy: bool = True) -> Select: """ Set hints for this expression. Examples: >>> Select().select("x").from_("tbl").hint("BROADCAST(y)").sql(dialect="spark") 'SELECT /*+ BROADCAST(y) */ x FROM tbl' Args: hints: The SQL code strings to parse as the hints. If an `Expression` instance is passed, it will be used as-is. dialect: The dialect used to parse the hints. copy: If `False`, modify this expression instance in-place. Returns: The modified expression. """ inst = maybe_copy(self, copy) inst.set( "hint", Hint(expressions=[maybe_parse(h, copy=copy, dialect=dialect) for h in hints]) ) return inst @property def named_selects(self) -> t.List[str]: return [e.output_name for e in self.expressions if e.alias_or_name] @property def is_star(self) -> bool: return any(expression.is_star for expression in self.expressions) @property def selects(self) -> t.List[Expression]: return self.expressions UNWRAPPED_QUERIES = (Select, SetOperation) class Subquery(DerivedTable, Query): arg_types = { "this": True, "alias": False, "with": False, **QUERY_MODIFIERS, } def unnest(self): """Returns the first non subquery.""" expression = self while isinstance(expression, Subquery): expression = expression.this return expression def unwrap(self) -> Subquery: expression = self while expression.same_parent and expression.is_wrapper: expression = t.cast(Subquery, expression.parent) return expression def select( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Subquery: this = maybe_copy(self, copy) this.unnest().select(*expressions, append=append, dialect=dialect, copy=False, **opts) return this @property def is_wrapper(self) -> bool: """ Whether this Subquery acts as a simple wrapper around another expression. SELECT * FROM (((SELECT * FROM t))) ^ This corresponds to a "wrapper" Subquery node """ return all(v is None for k, v in self.args.items() if k != "this") @property def is_star(self) -> bool: return self.this.is_star @property def output_name(self) -> str: return self.alias class TableSample(Expression): arg_types = { "expressions": False, "method": False, "bucket_numerator": False, "bucket_denominator": False, "bucket_field": False, "percent": False, "rows": False, "size": False, "seed": False, } class Tag(Expression): """Tags are used for generating arbitrary sql like SELECT x.""" arg_types = { "this": False, "prefix": False, "postfix": False, } # Represents both the standard SQL PIVOT operator and DuckDB's "simplified" PIVOT syntax # https://duckdb.org/docs/sql/statements/pivot class Pivot(Expression): arg_types = { "this": False, "alias": False, "expressions": False, "field": False, "unpivot": False, "using": False, "group": False, "columns": False, "include_nulls": False, "default_on_null": False, "into": False, } @property def unpivot(self) -> bool: return bool(self.args.get("unpivot")) # https://duckdb.org/docs/sql/statements/unpivot#simplified-unpivot-syntax # UNPIVOT ... INTO [NAME VALUE ][...,] class UnpivotColumns(Expression): arg_types = {"this": True, "expressions": True} class Window(Condition): arg_types = { "this": True, "partition_by": False, "order": False, "spec": False, "alias": False, "over": False, "first": False, } class WindowSpec(Expression): arg_types = { "kind": False, "start": False, "start_side": False, "end": False, "end_side": False, } class PreWhere(Expression): pass class Where(Expression): pass class Star(Expression): arg_types = {"except": False, "replace": False, "rename": False} @property def name(self) -> str: return "*" @property def output_name(self) -> str: return self.name class Parameter(Condition): arg_types = {"this": True, "expression": False} class SessionParameter(Condition): arg_types = {"this": True, "kind": False} class Placeholder(Condition): arg_types = {"this": False, "kind": False} @property def name(self) -> str: return self.this or "?" class Null(Condition): arg_types: t.Dict[str, t.Any] = {} @property def name(self) -> str: return "NULL" def to_py(self) -> Lit[None]: return None class Boolean(Condition): def to_py(self) -> bool: return self.this class DataTypeParam(Expression): arg_types = {"this": True, "expression": False} @property def name(self) -> str: return self.this.name # The `nullable` arg is helpful when transpiling types from other dialects to ClickHouse, which # assumes non-nullable types by default. Values `None` and `True` mean the type is nullable. class DataType(Expression): arg_types = { "this": True, "expressions": False, "nested": False, "values": False, "prefix": False, "kind": False, "nullable": False, } class Type(AutoName): ARRAY = auto() AGGREGATEFUNCTION = auto() SIMPLEAGGREGATEFUNCTION = auto() BIGDECIMAL = auto() BIGINT = auto() BIGSERIAL = auto() BINARY = auto() BIT = auto() BLOB = auto() BOOLEAN = auto() BPCHAR = auto() CHAR = auto() DATE = auto() DATE32 = auto() DATEMULTIRANGE = auto() DATERANGE = auto() DATETIME = auto() DATETIME2 = auto() DATETIME64 = auto() DECIMAL = auto() DECIMAL32 = auto() DECIMAL64 = auto() DECIMAL128 = auto() DECIMAL256 = auto() DOUBLE = auto() DYNAMIC = auto() ENUM = auto() ENUM8 = auto() ENUM16 = auto() FIXEDSTRING = auto() FLOAT = auto() GEOGRAPHY = auto() GEOMETRY = auto() POINT = auto() RING = auto() LINESTRING = auto() MULTILINESTRING = auto() POLYGON = auto() MULTIPOLYGON = auto() HLLSKETCH = auto() HSTORE = auto() IMAGE = auto() INET = auto() INT = auto() INT128 = auto() INT256 = auto() INT4MULTIRANGE = auto() INT4RANGE = auto() INT8MULTIRANGE = auto() INT8RANGE = auto() INTERVAL = auto() IPADDRESS = auto() IPPREFIX = auto() IPV4 = auto() IPV6 = auto() JSON = auto() JSONB = auto() LIST = auto() LONGBLOB = auto() LONGTEXT = auto() LOWCARDINALITY = auto() MAP = auto() MEDIUMBLOB = auto() MEDIUMINT = auto() MEDIUMTEXT = auto() MONEY = auto() NAME = auto() NCHAR = auto() NESTED = auto() NULL = auto() NUMMULTIRANGE = auto() NUMRANGE = auto() NVARCHAR = auto() OBJECT = auto() RANGE = auto() ROWVERSION = auto() SERIAL = auto() SET = auto() SMALLDATETIME = auto() SMALLINT = auto() SMALLMONEY = auto() SMALLSERIAL = auto() STRUCT = auto() SUPER = auto() TEXT = auto() TINYBLOB = auto() TINYTEXT = auto() TIME = auto() TIMETZ = auto() TIMESTAMP = auto() TIMESTAMPNTZ = auto() TIMESTAMPLTZ = auto() TIMESTAMPTZ = auto() TIMESTAMP_S = auto() TIMESTAMP_MS = auto() TIMESTAMP_NS = auto() TINYINT = auto() TSMULTIRANGE = auto() TSRANGE = auto() TSTZMULTIRANGE = auto() TSTZRANGE = auto() UBIGINT = auto() UINT = auto() UINT128 = auto() UINT256 = auto() UMEDIUMINT = auto() UDECIMAL = auto() UDOUBLE = auto() UNION = auto() UNKNOWN = auto() # Sentinel value, useful for type annotation USERDEFINED = "USER-DEFINED" USMALLINT = auto() UTINYINT = auto() UUID = auto() VARBINARY = auto() VARCHAR = auto() VARIANT = auto() VECTOR = auto() XML = auto() YEAR = auto() TDIGEST = auto() STRUCT_TYPES = { Type.NESTED, Type.OBJECT, Type.STRUCT, Type.UNION, } ARRAY_TYPES = { Type.ARRAY, Type.LIST, } NESTED_TYPES = { *STRUCT_TYPES, *ARRAY_TYPES, Type.MAP, } TEXT_TYPES = { Type.CHAR, Type.NCHAR, Type.NVARCHAR, Type.TEXT, Type.VARCHAR, Type.NAME, } SIGNED_INTEGER_TYPES = { Type.BIGINT, Type.INT, Type.INT128, Type.INT256, Type.MEDIUMINT, Type.SMALLINT, Type.TINYINT, } UNSIGNED_INTEGER_TYPES = { Type.UBIGINT, Type.UINT, Type.UINT128, Type.UINT256, Type.UMEDIUMINT, Type.USMALLINT, Type.UTINYINT, } INTEGER_TYPES = { *SIGNED_INTEGER_TYPES, *UNSIGNED_INTEGER_TYPES, Type.BIT, } FLOAT_TYPES = { Type.DOUBLE, Type.FLOAT, } REAL_TYPES = { *FLOAT_TYPES, Type.BIGDECIMAL, Type.DECIMAL, Type.DECIMAL32, Type.DECIMAL64, Type.DECIMAL128, Type.DECIMAL256, Type.MONEY, Type.SMALLMONEY, Type.UDECIMAL, Type.UDOUBLE, } NUMERIC_TYPES = { *INTEGER_TYPES, *REAL_TYPES, } TEMPORAL_TYPES = { Type.DATE, Type.DATE32, Type.DATETIME, Type.DATETIME2, Type.DATETIME64, Type.SMALLDATETIME, Type.TIME, Type.TIMESTAMP, Type.TIMESTAMPNTZ, Type.TIMESTAMPLTZ, Type.TIMESTAMPTZ, Type.TIMESTAMP_MS, Type.TIMESTAMP_NS, Type.TIMESTAMP_S, Type.TIMETZ, } @classmethod def build( cls, dtype: DATA_TYPE, dialect: DialectType = None, udt: bool = False, copy: bool = True, **kwargs, ) -> DataType: """ Constructs a DataType object. Args: dtype: the data type of interest. dialect: the dialect to use for parsing `dtype`, in case it's a string. udt: when set to True, `dtype` will be used as-is if it can't be parsed into a DataType, thus creating a user-defined type. copy: whether to copy the data type. kwargs: additional arguments to pass in the constructor of DataType. Returns: The constructed DataType object. """ from sqlglot import parse_one if isinstance(dtype, str): if dtype.upper() == "UNKNOWN": return DataType(this=DataType.Type.UNKNOWN, **kwargs) try: data_type_exp = parse_one( dtype, read=dialect, into=DataType, error_level=ErrorLevel.IGNORE ) except ParseError: if udt: return DataType(this=DataType.Type.USERDEFINED, kind=dtype, **kwargs) raise elif isinstance(dtype, DataType.Type): data_type_exp = DataType(this=dtype) elif isinstance(dtype, DataType): return maybe_copy(dtype, copy) else: raise ValueError(f"Invalid data type: {type(dtype)}. Expected str or DataType.Type") return DataType(**{**data_type_exp.args, **kwargs}) def is_type(self, *dtypes: DATA_TYPE, check_nullable: bool = False) -> bool: """ Checks whether this DataType matches one of the provided data types. Nested types or precision will be compared using "structural equivalence" semantics, so e.g. array != array. Args: dtypes: the data types to compare this DataType to. check_nullable: whether to take the NULLABLE type constructor into account for the comparison. If false, it means that NULLABLE is equivalent to INT. Returns: True, if and only if there is a type in `dtypes` which is equal to this DataType. """ self_is_nullable = self.args.get("nullable") for dtype in dtypes: other_type = DataType.build(dtype, copy=False, udt=True) other_is_nullable = other_type.args.get("nullable") if ( other_type.expressions or (check_nullable and (self_is_nullable or other_is_nullable)) or self.this == DataType.Type.USERDEFINED or other_type.this == DataType.Type.USERDEFINED ): matches = self == other_type else: matches = self.this == other_type.this if matches: return True return False DATA_TYPE = t.Union[str, DataType, DataType.Type] # https://www.postgresql.org/docs/15/datatype-pseudo.html class PseudoType(DataType): arg_types = {"this": True} # https://www.postgresql.org/docs/15/datatype-oid.html class ObjectIdentifier(DataType): arg_types = {"this": True} # WHERE x EXISTS|ALL|ANY|SOME(SELECT ...) class SubqueryPredicate(Predicate): pass class All(SubqueryPredicate): pass class Any(SubqueryPredicate): pass # Commands to interact with the databases or engines. For most of the command # expressions we parse whatever comes after the command's name as a string. class Command(Expression): arg_types = {"this": True, "expression": False} class Transaction(Expression): arg_types = {"this": False, "modes": False, "mark": False} class Commit(Expression): arg_types = {"chain": False, "this": False, "durability": False} class Rollback(Expression): arg_types = {"savepoint": False, "this": False} class Alter(Expression): arg_types = { "this": True, "kind": True, "actions": True, "exists": False, "only": False, "options": False, "cluster": False, "not_valid": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() @property def actions(self) -> t.List[Expression]: return self.args.get("actions") or [] class Analyze(Expression): arg_types = { "kind": False, "this": False, "options": False, "mode": False, "partition": False, "expression": False, "properties": False, } class AnalyzeStatistics(Expression): arg_types = { "kind": True, "option": False, "this": False, "expressions": False, } class AnalyzeHistogram(Expression): arg_types = { "this": True, "expressions": True, "expression": False, "update_options": False, } class AnalyzeSample(Expression): arg_types = {"kind": True, "sample": True} class AnalyzeListChainedRows(Expression): arg_types = {"expression": False} class AnalyzeDelete(Expression): arg_types = {"kind": False} class AnalyzeWith(Expression): arg_types = {"expressions": True} class AnalyzeValidate(Expression): arg_types = { "kind": True, "this": False, "expression": False, } class AnalyzeColumns(Expression): pass class UsingData(Expression): pass class AddConstraint(Expression): arg_types = {"expressions": True} class AttachOption(Expression): arg_types = {"this": True, "expression": False} class DropPartition(Expression): arg_types = {"expressions": True, "exists": False} # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#replace-partition class ReplacePartition(Expression): arg_types = {"expression": True, "source": True} # Binary expressions like (ADD a b) class Binary(Condition): arg_types = {"this": True, "expression": True} @property def left(self) -> Expression: return self.this @property def right(self) -> Expression: return self.expression class Add(Binary): pass class Connector(Binary): pass class BitwiseAnd(Binary): pass class BitwiseLeftShift(Binary): pass class BitwiseOr(Binary): pass class BitwiseRightShift(Binary): pass class BitwiseXor(Binary): pass class Div(Binary): arg_types = {"this": True, "expression": True, "typed": False, "safe": False} class Overlaps(Binary): pass class Dot(Binary): @property def is_star(self) -> bool: return self.expression.is_star @property def name(self) -> str: return self.expression.name @property def output_name(self) -> str: return self.name @classmethod def build(self, expressions: t.Sequence[Expression]) -> Dot: """Build a Dot object with a sequence of expressions.""" if len(expressions) < 2: raise ValueError("Dot requires >= 2 expressions.") return t.cast(Dot, reduce(lambda x, y: Dot(this=x, expression=y), expressions)) @property def parts(self) -> t.List[Expression]: """Return the parts of a table / column in order catalog, db, table.""" this, *parts = self.flatten() parts.reverse() for arg in COLUMN_PARTS: part = this.args.get(arg) if isinstance(part, Expression): parts.append(part) parts.reverse() return parts class DPipe(Binary): arg_types = {"this": True, "expression": True, "safe": False} class EQ(Binary, Predicate): pass class NullSafeEQ(Binary, Predicate): pass class NullSafeNEQ(Binary, Predicate): pass # Represents e.g. := in DuckDB which is mostly used for setting parameters class PropertyEQ(Binary): pass class Distance(Binary): pass class Escape(Binary): pass class Glob(Binary, Predicate): pass class GT(Binary, Predicate): pass class GTE(Binary, Predicate): pass class ILike(Binary, Predicate): pass class ILikeAny(Binary, Predicate): pass class IntDiv(Binary): pass class Is(Binary, Predicate): pass class Kwarg(Binary): """Kwarg in special functions like func(kwarg => y).""" class Like(Binary, Predicate): pass class LikeAny(Binary, Predicate): pass class LT(Binary, Predicate): pass class LTE(Binary, Predicate): pass class Mod(Binary): pass class Mul(Binary): pass class NEQ(Binary, Predicate): pass # https://www.postgresql.org/docs/current/ddl-schemas.html#DDL-SCHEMAS-PATH class Operator(Binary): arg_types = {"this": True, "operator": True, "expression": True} class SimilarTo(Binary, Predicate): pass class Slice(Binary): arg_types = {"this": False, "expression": False} class Sub(Binary): pass # Unary Expressions # (NOT a) class Unary(Condition): pass class BitwiseNot(Unary): pass class Not(Unary): pass class Paren(Unary): @property def output_name(self) -> str: return self.this.name class Neg(Unary): def to_py(self) -> int | Decimal: if self.is_number: return self.this.to_py() * -1 return super().to_py() class Alias(Expression): arg_types = {"this": True, "alias": False} @property def output_name(self) -> str: return self.alias # BigQuery requires the UNPIVOT column list aliases to be either strings or ints, but # other dialects require identifiers. This enables us to transpile between them easily. class PivotAlias(Alias): pass # Represents Snowflake's ANY [ ORDER BY ... ] syntax # https://docs.snowflake.com/en/sql-reference/constructs/pivot class PivotAny(Expression): arg_types = {"this": False} class Aliases(Expression): arg_types = {"this": True, "expressions": True} @property def aliases(self): return self.expressions # https://docs.aws.amazon.com/redshift/latest/dg/query-super.html class AtIndex(Expression): arg_types = {"this": True, "expression": True} class AtTimeZone(Expression): arg_types = {"this": True, "zone": True} class FromTimeZone(Expression): arg_types = {"this": True, "zone": True} class Between(Predicate): arg_types = {"this": True, "low": True, "high": True} class Bracket(Condition): # https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#array_subscript_operator arg_types = { "this": True, "expressions": True, "offset": False, "safe": False, "returns_list_for_maps": False, } @property def output_name(self) -> str: if len(self.expressions) == 1: return self.expressions[0].output_name return super().output_name class Distinct(Expression): arg_types = {"expressions": False, "on": False} class In(Predicate): arg_types = { "this": True, "expressions": False, "query": False, "unnest": False, "field": False, "is_global": False, } # https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#for-in class ForIn(Expression): arg_types = {"this": True, "expression": True} class TimeUnit(Expression): """Automatically converts unit arg into a var.""" arg_types = {"unit": False} UNABBREVIATED_UNIT_NAME = { "D": "DAY", "H": "HOUR", "M": "MINUTE", "MS": "MILLISECOND", "NS": "NANOSECOND", "Q": "QUARTER", "S": "SECOND", "US": "MICROSECOND", "W": "WEEK", "Y": "YEAR", } VAR_LIKE = (Column, Literal, Var) def __init__(self, **args): unit = args.get("unit") if isinstance(unit, self.VAR_LIKE): args["unit"] = Var( this=(self.UNABBREVIATED_UNIT_NAME.get(unit.name) or unit.name).upper() ) elif isinstance(unit, Week): unit.set("this", Var(this=unit.this.name.upper())) super().__init__(**args) @property def unit(self) -> t.Optional[Var | IntervalSpan]: return self.args.get("unit") class IntervalOp(TimeUnit): arg_types = {"unit": False, "expression": True} def interval(self): return Interval( this=self.expression.copy(), unit=self.unit.copy() if self.unit else None, ) # https://www.oracletutorial.com/oracle-basics/oracle-interval/ # https://trino.io/docs/current/language/types.html#interval-day-to-second # https://docs.databricks.com/en/sql/language-manual/data-types/interval-type.html class IntervalSpan(DataType): arg_types = {"this": True, "expression": True} class Interval(TimeUnit): arg_types = {"this": False, "unit": False} class IgnoreNulls(Expression): pass class RespectNulls(Expression): pass # https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate-function-calls#max_min_clause class HavingMax(Expression): arg_types = {"this": True, "expression": True, "max": True} # Functions class Func(Condition): """ The base class for all function expressions. Attributes: is_var_len_args (bool): if set to True the last argument defined in arg_types will be treated as a variable length argument and the argument's value will be stored as a list. _sql_names (list): the SQL name (1st item in the list) and aliases (subsequent items) for this function expression. These values are used to map this node to a name during parsing as well as to provide the function's name during SQL string generation. By default the SQL name is set to the expression's class name transformed to snake case. """ is_var_len_args = False @classmethod def from_arg_list(cls, args): if cls.is_var_len_args: all_arg_keys = list(cls.arg_types) # If this function supports variable length argument treat the last argument as such. non_var_len_arg_keys = all_arg_keys[:-1] if cls.is_var_len_args else all_arg_keys num_non_var = len(non_var_len_arg_keys) args_dict = {arg_key: arg for arg, arg_key in zip(args, non_var_len_arg_keys)} args_dict[all_arg_keys[-1]] = args[num_non_var:] else: args_dict = {arg_key: arg for arg, arg_key in zip(args, cls.arg_types)} return cls(**args_dict) @classmethod def sql_names(cls): if cls is Func: raise NotImplementedError( "SQL name is only supported by concrete function implementations" ) if "_sql_names" not in cls.__dict__: cls._sql_names = [camel_to_snake_case(cls.__name__)] return cls._sql_names @classmethod def sql_name(cls): return cls.sql_names()[0] @classmethod def default_parser_mappings(cls): return {name: cls.from_arg_list for name in cls.sql_names()} class AggFunc(Func): pass class ParameterizedAgg(AggFunc): arg_types = {"this": True, "expressions": True, "params": True} class Abs(Func): pass class ArgMax(AggFunc): arg_types = {"this": True, "expression": True, "count": False} _sql_names = ["ARG_MAX", "ARGMAX", "MAX_BY"] class ArgMin(AggFunc): arg_types = {"this": True, "expression": True, "count": False} _sql_names = ["ARG_MIN", "ARGMIN", "MIN_BY"] class ApproxTopK(AggFunc): arg_types = {"this": True, "expression": False, "counters": False} class Flatten(Func): pass # https://spark.apache.org/docs/latest/api/sql/index.html#transform class Transform(Func): arg_types = {"this": True, "expression": True} class Anonymous(Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True @property def name(self) -> str: return self.this if isinstance(self.this, str) else self.this.name class AnonymousAggFunc(AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True # https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators class CombinedAggFunc(AnonymousAggFunc): arg_types = {"this": True, "expressions": False} class CombinedParameterizedAgg(ParameterizedAgg): arg_types = {"this": True, "expressions": True, "params": True} # https://docs.snowflake.com/en/sql-reference/functions/hll # https://docs.aws.amazon.com/redshift/latest/dg/r_HLL_function.html class Hll(AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class ApproxDistinct(AggFunc): arg_types = {"this": True, "accuracy": False} _sql_names = ["APPROX_DISTINCT", "APPROX_COUNT_DISTINCT"] class Apply(Func): arg_types = {"this": True, "expression": True} class Array(Func): arg_types = {"expressions": False, "bracket_notation": False} is_var_len_args = True # https://docs.snowflake.com/en/sql-reference/functions/to_array class ToArray(Func): pass # https://materialize.com/docs/sql/types/list/ class List(Func): arg_types = {"expressions": False} is_var_len_args = True # String pad, kind True -> LPAD, False -> RPAD class Pad(Func): arg_types = {"this": True, "expression": True, "fill_pattern": False, "is_left": True} # https://docs.snowflake.com/en/sql-reference/functions/to_char # https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/TO_CHAR-number.html class ToChar(Func): arg_types = {"this": True, "format": False, "nlsparam": False} # https://docs.snowflake.com/en/sql-reference/functions/to_decimal # https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/TO_NUMBER.html class ToNumber(Func): arg_types = { "this": True, "format": False, "nlsparam": False, "precision": False, "scale": False, } # https://docs.snowflake.com/en/sql-reference/functions/to_double class ToDouble(Func): arg_types = { "this": True, "format": False, } class Columns(Func): arg_types = {"this": True, "unpack": False} # https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-ver16#syntax class Convert(Func): arg_types = {"this": True, "expression": True, "style": False} class ConvertTimezone(Func): arg_types = {"source_tz": False, "target_tz": True, "timestamp": True} class GenerateSeries(Func): arg_types = {"start": True, "end": True, "step": False, "is_end_exclusive": False} # Postgres' GENERATE_SERIES function returns a row set, i.e. it implicitly explodes when it's # used in a projection, so this expression is a helper that facilitates transpilation to other # dialects. For example, we'd generate UNNEST(GENERATE_SERIES(...)) in DuckDB class ExplodingGenerateSeries(GenerateSeries): pass class ArrayAgg(AggFunc): arg_types = {"this": True, "nulls_excluded": False} class ArrayUniqueAgg(AggFunc): pass class ArrayAll(Func): arg_types = {"this": True, "expression": True} # Represents Python's `any(f(x) for x in array)`, where `array` is `this` and `f` is `expression` class ArrayAny(Func): arg_types = {"this": True, "expression": True} class ArrayConcat(Func): _sql_names = ["ARRAY_CONCAT", "ARRAY_CAT"] arg_types = {"this": True, "expressions": False} is_var_len_args = True class ArrayConstructCompact(Func): arg_types = {"expressions": True} is_var_len_args = True class ArrayContains(Binary, Func): _sql_names = ["ARRAY_CONTAINS", "ARRAY_HAS"] class ArrayContainsAll(Binary, Func): _sql_names = ["ARRAY_CONTAINS_ALL", "ARRAY_HAS_ALL"] class ArrayFilter(Func): arg_types = {"this": True, "expression": True} _sql_names = ["FILTER", "ARRAY_FILTER"] class ArrayToString(Func): arg_types = {"this": True, "expression": True, "null": False} _sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"] # https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#string class String(Func): arg_types = {"this": True, "zone": False} class StringToArray(Func): arg_types = {"this": True, "expression": True, "null": False} _sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING"] class ArrayOverlaps(Binary, Func): pass class ArraySize(Func): arg_types = {"this": True, "expression": False} _sql_names = ["ARRAY_SIZE", "ARRAY_LENGTH"] class ArraySort(Func): arg_types = {"this": True, "expression": False} class ArraySum(Func): arg_types = {"this": True, "expression": False} class ArrayUnionAgg(AggFunc): pass class Avg(AggFunc): pass class AnyValue(AggFunc): pass class Lag(AggFunc): arg_types = {"this": True, "offset": False, "default": False} class Lead(AggFunc): arg_types = {"this": True, "offset": False, "default": False} # some dialects have a distinction between first and first_value, usually first is an aggregate func # and first_value is a window func class First(AggFunc): pass class Last(AggFunc): pass class FirstValue(AggFunc): pass class LastValue(AggFunc): pass class NthValue(AggFunc): arg_types = {"this": True, "offset": True} class Case(Func): arg_types = {"this": False, "ifs": True, "default": False} def when(self, condition: ExpOrStr, then: ExpOrStr, copy: bool = True, **opts) -> Case: instance = maybe_copy(self, copy) instance.append( "ifs", If( this=maybe_parse(condition, copy=copy, **opts), true=maybe_parse(then, copy=copy, **opts), ), ) return instance def else_(self, condition: ExpOrStr, copy: bool = True, **opts) -> Case: instance = maybe_copy(self, copy) instance.set("default", maybe_parse(condition, copy=copy, **opts)) return instance class Cast(Func): arg_types = { "this": True, "to": True, "format": False, "safe": False, "action": False, "default": False, } @property def name(self) -> str: return self.this.name @property def to(self) -> DataType: return self.args["to"] @property def output_name(self) -> str: return self.name def is_type(self, *dtypes: DATA_TYPE) -> bool: """ Checks whether this Cast's DataType matches one of the provided data types. Nested types like arrays or structs will be compared using "structural equivalence" semantics, so e.g. array != array. Args: dtypes: the data types to compare this Cast's DataType to. Returns: True, if and only if there is a type in `dtypes` which is equal to this Cast's DataType. """ return self.to.is_type(*dtypes) class TryCast(Cast): pass # https://clickhouse.com/docs/sql-reference/data-types/newjson#reading-json-paths-as-sub-columns class JSONCast(Cast): pass class Try(Func): pass class CastToStrType(Func): arg_types = {"this": True, "to": True} class Collate(Binary, Func): pass class Ceil(Func): arg_types = {"this": True, "decimals": False, "to": False} _sql_names = ["CEIL", "CEILING"] class Coalesce(Func): arg_types = {"this": True, "expressions": False, "is_nvl": False} is_var_len_args = True _sql_names = ["COALESCE", "IFNULL", "NVL"] class Chr(Func): arg_types = {"expressions": True, "charset": False} is_var_len_args = True _sql_names = ["CHR", "CHAR"] class Concat(Func): arg_types = {"expressions": True, "safe": False, "coalesce": False} is_var_len_args = True class ConcatWs(Concat): _sql_names = ["CONCAT_WS"] class Contains(Func): arg_types = {"this": True, "expression": True} # https://docs.oracle.com/cd/B13789_01/server.101/b10759/operators004.htm#i1035022 class ConnectByRoot(Func): pass class Count(AggFunc): arg_types = {"this": False, "expressions": False, "big_int": False} is_var_len_args = True class CountIf(AggFunc): _sql_names = ["COUNT_IF", "COUNTIF"] # cube root class Cbrt(Func): pass class CurrentDate(Func): arg_types = {"this": False} class CurrentDatetime(Func): arg_types = {"this": False} class CurrentTime(Func): arg_types = {"this": False} class CurrentTimestamp(Func): arg_types = {"this": False, "sysdate": False} class CurrentSchema(Func): arg_types = {"this": False} class CurrentUser(Func): arg_types = {"this": False} class DateAdd(Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DateBin(Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False, "zone": False} class DateSub(Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DateDiff(Func, TimeUnit): _sql_names = ["DATEDIFF", "DATE_DIFF"] arg_types = {"this": True, "expression": True, "unit": False} class DateTrunc(Func): arg_types = {"unit": True, "this": True, "zone": False} def __init__(self, **args): # Across most dialects it's safe to unabbreviate the unit (e.g. 'Q' -> 'QUARTER') except Oracle # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html unabbreviate = args.pop("unabbreviate", True) unit = args.get("unit") if isinstance(unit, TimeUnit.VAR_LIKE): unit_name = unit.name.upper() if unabbreviate and unit_name in TimeUnit.UNABBREVIATED_UNIT_NAME: unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name] args["unit"] = Literal.string(unit_name) elif isinstance(unit, Week): unit.set("this", Literal.string(unit.this.name.upper())) super().__init__(**args) @property def unit(self) -> Expression: return self.args["unit"] # https://cloud.google.com/bigquery/docs/reference/standard-sql/datetime_functions#datetime # expression can either be time_expr or time_zone class Datetime(Func): arg_types = {"this": True, "expression": False} class DatetimeAdd(Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeSub(Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeDiff(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeTrunc(Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False} class DayOfWeek(Func): _sql_names = ["DAY_OF_WEEK", "DAYOFWEEK"] # https://duckdb.org/docs/sql/functions/datepart.html#part-specifiers-only-usable-as-date-part-specifiers # ISO day of week function in duckdb is ISODOW class DayOfWeekIso(Func): _sql_names = ["DAYOFWEEK_ISO", "ISODOW"] class DayOfMonth(Func): _sql_names = ["DAY_OF_MONTH", "DAYOFMONTH"] class DayOfYear(Func): _sql_names = ["DAY_OF_YEAR", "DAYOFYEAR"] class ToDays(Func): pass class WeekOfYear(Func): _sql_names = ["WEEK_OF_YEAR", "WEEKOFYEAR"] class MonthsBetween(Func): arg_types = {"this": True, "expression": True, "roundoff": False} class MakeInterval(Func): arg_types = { "year": False, "month": False, "day": False, "hour": False, "minute": False, "second": False, } class LastDay(Func, TimeUnit): _sql_names = ["LAST_DAY", "LAST_DAY_OF_MONTH"] arg_types = {"this": True, "unit": False} class Extract(Func): arg_types = {"this": True, "expression": True} class Exists(Func, SubqueryPredicate): arg_types = {"this": True, "expression": False} class Timestamp(Func): arg_types = {"this": False, "zone": False, "with_tz": False} class TimestampAdd(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimestampSub(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimestampDiff(Func, TimeUnit): _sql_names = ["TIMESTAMPDIFF", "TIMESTAMP_DIFF"] arg_types = {"this": True, "expression": True, "unit": False} class TimestampTrunc(Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False} class TimeAdd(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimeSub(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimeDiff(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimeTrunc(Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False} class DateFromParts(Func): _sql_names = ["DATE_FROM_PARTS", "DATEFROMPARTS"] arg_types = {"year": True, "month": True, "day": True} class TimeFromParts(Func): _sql_names = ["TIME_FROM_PARTS", "TIMEFROMPARTS"] arg_types = { "hour": True, "min": True, "sec": True, "nano": False, "fractions": False, "precision": False, } class DateStrToDate(Func): pass class DateToDateStr(Func): pass class DateToDi(Func): pass # https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date class Date(Func): arg_types = {"this": False, "zone": False, "expressions": False} is_var_len_args = True class Day(Func): pass class Decode(Func): arg_types = {"this": True, "charset": True, "replace": False} class DiToDate(Func): pass class Encode(Func): arg_types = {"this": True, "charset": True} class Exp(Func): pass # https://docs.snowflake.com/en/sql-reference/functions/flatten class Explode(Func, UDTF): arg_types = {"this": True, "expressions": False} is_var_len_args = True # https://spark.apache.org/docs/latest/api/sql/#inline class Inline(Func): pass class ExplodeOuter(Explode): pass class Posexplode(Explode): pass class PosexplodeOuter(Posexplode, ExplodeOuter): pass class Unnest(Func, UDTF): arg_types = { "expressions": True, "alias": False, "offset": False, "explode_array": False, } @property def selects(self) -> t.List[Expression]: columns = super().selects offset = self.args.get("offset") if offset: columns = columns + [to_identifier("offset") if offset is True else offset] return columns class Floor(Func): arg_types = {"this": True, "decimals": False, "to": False} class FromBase64(Func): pass class FeaturesAtTime(Func): arg_types = {"this": True, "time": False, "num_rows": False, "ignore_feature_nulls": False} class ToBase64(Func): pass # https://trino.io/docs/current/functions/datetime.html#from_iso8601_timestamp class FromISO8601Timestamp(Func): _sql_names = ["FROM_ISO8601_TIMESTAMP"] class GapFill(Func): arg_types = { "this": True, "ts_column": True, "bucket_width": True, "partitioning_columns": False, "value_columns": False, "origin": False, "ignore_nulls": False, } # https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#generate_date_array class GenerateDateArray(Func): arg_types = {"start": True, "end": True, "step": False} # https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#generate_timestamp_array class GenerateTimestampArray(Func): arg_types = {"start": True, "end": True, "step": True} class Greatest(Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True # Trino's `ON OVERFLOW TRUNCATE [filler_string] {WITH | WITHOUT} COUNT` # https://trino.io/docs/current/functions/aggregate.html#listagg class OverflowTruncateBehavior(Expression): arg_types = {"this": False, "with_count": True} class GroupConcat(AggFunc): arg_types = {"this": True, "separator": False, "on_overflow": False} class Hex(Func): pass class LowerHex(Hex): pass class And(Connector, Func): pass class Or(Connector, Func): pass class Xor(Connector, Func): arg_types = {"this": False, "expression": False, "expressions": False} class If(Func): arg_types = {"this": True, "true": True, "false": False} _sql_names = ["IF", "IIF"] class Nullif(Func): arg_types = {"this": True, "expression": True} class Initcap(Func): arg_types = {"this": True, "expression": False} class IsAscii(Func): pass class IsNan(Func): _sql_names = ["IS_NAN", "ISNAN"] # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#int64_for_json class Int64(Func): pass class IsInf(Func): _sql_names = ["IS_INF", "ISINF"] # https://www.postgresql.org/docs/current/functions-json.html class JSON(Expression): arg_types = {"this": False, "with": False, "unique": False} class JSONPath(Expression): arg_types = {"expressions": True, "escape": False} @property def output_name(self) -> str: last_segment = self.expressions[-1].this return last_segment if isinstance(last_segment, str) else "" class JSONPathPart(Expression): arg_types = {} class JSONPathFilter(JSONPathPart): arg_types = {"this": True} class JSONPathKey(JSONPathPart): arg_types = {"this": True} class JSONPathRecursive(JSONPathPart): arg_types = {"this": False} class JSONPathRoot(JSONPathPart): pass class JSONPathScript(JSONPathPart): arg_types = {"this": True} class JSONPathSlice(JSONPathPart): arg_types = {"start": False, "end": False, "step": False} class JSONPathSelector(JSONPathPart): arg_types = {"this": True} class JSONPathSubscript(JSONPathPart): arg_types = {"this": True} class JSONPathUnion(JSONPathPart): arg_types = {"expressions": True} class JSONPathWildcard(JSONPathPart): pass class FormatJson(Expression): pass class JSONKeyValue(Expression): arg_types = {"this": True, "expression": True} class JSONObject(Func): arg_types = { "expressions": False, "null_handling": False, "unique_keys": False, "return_type": False, "encoding": False, } class JSONObjectAgg(AggFunc): arg_types = { "expressions": False, "null_handling": False, "unique_keys": False, "return_type": False, "encoding": False, } # https://www.postgresql.org/docs/9.5/functions-aggregate.html class JSONBObjectAgg(AggFunc): arg_types = {"this": True, "expression": True} # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAY.html class JSONArray(Func): arg_types = { "expressions": True, "null_handling": False, "return_type": False, "strict": False, } # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAYAGG.html class JSONArrayAgg(Func): arg_types = { "this": True, "order": False, "null_handling": False, "return_type": False, "strict": False, } class JSONExists(Func): arg_types = {"this": True, "path": True, "passing": False, "on_condition": False} # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html # Note: parsing of JSON column definitions is currently incomplete. class JSONColumnDef(Expression): arg_types = {"this": False, "kind": False, "path": False, "nested_schema": False} class JSONSchema(Expression): arg_types = {"expressions": True} # https://dev.mysql.com/doc/refman/8.4/en/json-search-functions.html#function_json-value class JSONValue(Expression): arg_types = { "this": True, "path": True, "returning": False, "on_condition": False, } class JSONValueArray(Func): arg_types = {"this": True, "expression": False} # # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html class JSONTable(Func): arg_types = { "this": True, "schema": True, "path": False, "error_handling": False, "empty_handling": False, } # https://docs.snowflake.com/en/sql-reference/functions/object_insert class ObjectInsert(Func): arg_types = { "this": True, "key": True, "value": True, "update_flag": False, } class OpenJSONColumnDef(Expression): arg_types = {"this": True, "kind": True, "path": False, "as_json": False} class OpenJSON(Func): arg_types = {"this": True, "path": False, "expressions": False} class JSONBContains(Binary, Func): _sql_names = ["JSONB_CONTAINS"] class JSONBExists(Func): arg_types = {"this": True, "path": True} _sql_names = ["JSONB_EXISTS"] class JSONExtract(Binary, Func): arg_types = { "this": True, "expression": True, "only_json_types": False, "expressions": False, "variant_extract": False, "json_query": False, "option": False, "quote": False, "on_condition": False, } _sql_names = ["JSON_EXTRACT"] is_var_len_args = True @property def output_name(self) -> str: return self.expression.output_name if not self.expressions else "" # https://trino.io/docs/current/functions/json.html#json-query class JSONExtractQuote(Expression): arg_types = { "option": True, "scalar": False, } class JSONExtractArray(Func): arg_types = {"this": True, "expression": False} _sql_names = ["JSON_EXTRACT_ARRAY"] class JSONExtractScalar(Binary, Func): arg_types = {"this": True, "expression": True, "only_json_types": False, "expressions": False} _sql_names = ["JSON_EXTRACT_SCALAR"] is_var_len_args = True @property def output_name(self) -> str: return self.expression.output_name class JSONBExtract(Binary, Func): _sql_names = ["JSONB_EXTRACT"] class JSONBExtractScalar(Binary, Func): _sql_names = ["JSONB_EXTRACT_SCALAR"] class JSONFormat(Func): arg_types = {"this": False, "options": False} _sql_names = ["JSON_FORMAT"] # https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html#operator_member-of class JSONArrayContains(Binary, Predicate, Func): _sql_names = ["JSON_ARRAY_CONTAINS"] class ParseJSON(Func): # BigQuery, Snowflake have PARSE_JSON, Presto has JSON_PARSE # Snowflake also has TRY_PARSE_JSON, which is represented using `safe` _sql_names = ["PARSE_JSON", "JSON_PARSE"] arg_types = {"this": True, "expression": False, "safe": False} class Least(Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Left(Func): arg_types = {"this": True, "expression": True} class Right(Func): arg_types = {"this": True, "expression": True} class Length(Func): arg_types = {"this": True, "binary": False, "encoding": False} _sql_names = ["LENGTH", "LEN", "CHAR_LENGTH", "CHARACTER_LENGTH"] class Levenshtein(Func): arg_types = { "this": True, "expression": False, "ins_cost": False, "del_cost": False, "sub_cost": False, "max_dist": False, } class Ln(Func): pass class Log(Func): arg_types = {"this": True, "expression": False} class LogicalOr(AggFunc): _sql_names = ["LOGICAL_OR", "BOOL_OR", "BOOLOR_AGG"] class LogicalAnd(AggFunc): _sql_names = ["LOGICAL_AND", "BOOL_AND", "BOOLAND_AGG"] class Lower(Func): _sql_names = ["LOWER", "LCASE"] class Map(Func): arg_types = {"keys": False, "values": False} @property def keys(self) -> t.List[Expression]: keys = self.args.get("keys") return keys.expressions if keys else [] @property def values(self) -> t.List[Expression]: values = self.args.get("values") return values.expressions if values else [] # Represents the MAP {...} syntax in DuckDB - basically convert a struct to a MAP class ToMap(Func): pass class MapFromEntries(Func): pass # https://learn.microsoft.com/en-us/sql/t-sql/language-elements/scope-resolution-operator-transact-sql?view=sql-server-ver16 class ScopeResolution(Expression): arg_types = {"this": False, "expression": True} class Stream(Expression): pass class StarMap(Func): pass class VarMap(Func): arg_types = {"keys": True, "values": True} is_var_len_args = True @property def keys(self) -> t.List[Expression]: return self.args["keys"].expressions @property def values(self) -> t.List[Expression]: return self.args["values"].expressions # https://dev.mysql.com/doc/refman/8.0/en/fulltext-search.html class MatchAgainst(Func): arg_types = {"this": True, "expressions": True, "modifier": False} class Max(AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class MD5(Func): _sql_names = ["MD5"] # Represents the variant of the MD5 function that returns a binary value class MD5Digest(Func): _sql_names = ["MD5_DIGEST"] class Median(AggFunc): pass class Min(AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Month(Func): pass class AddMonths(Func): arg_types = {"this": True, "expression": True} class Nvl2(Func): arg_types = {"this": True, "true": True, "false": False} class Normalize(Func): arg_types = {"this": True, "form": False} class Overlay(Func): arg_types = {"this": True, "expression": True, "from": True, "for": False} # https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-predict#mlpredict_function class Predict(Func): arg_types = {"this": True, "expression": True, "params_struct": False} class Pow(Binary, Func): _sql_names = ["POWER", "POW"] class PercentileCont(AggFunc): arg_types = {"this": True, "expression": False} class PercentileDisc(AggFunc): arg_types = {"this": True, "expression": False} class Quantile(AggFunc): arg_types = {"this": True, "quantile": True} class ApproxQuantile(Quantile): arg_types = {"this": True, "quantile": True, "accuracy": False, "weight": False} class Quarter(Func): pass # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Functions-Expressions-and-Predicates/Arithmetic-Trigonometric-Hyperbolic-Operators/Functions/RANDOM/RANDOM-Function-Syntax # teradata lower and upper bounds class Rand(Func): _sql_names = ["RAND", "RANDOM"] arg_types = {"this": False, "lower": False, "upper": False} class Randn(Func): arg_types = {"this": False} class RangeN(Func): arg_types = {"this": True, "expressions": True, "each": False} class ReadCSV(Func): _sql_names = ["READ_CSV"] is_var_len_args = True arg_types = {"this": True, "expressions": False} class Reduce(Func): arg_types = {"this": True, "initial": True, "merge": True, "finish": False} class RegexpExtract(Func): arg_types = { "this": True, "expression": True, "position": False, "occurrence": False, "parameters": False, "group": False, } class RegexpExtractAll(Func): arg_types = { "this": True, "expression": True, "position": False, "occurrence": False, "parameters": False, "group": False, } class RegexpReplace(Func): arg_types = { "this": True, "expression": True, "replacement": False, "position": False, "occurrence": False, "modifiers": False, } class RegexpLike(Binary, Func): arg_types = {"this": True, "expression": True, "flag": False} class RegexpILike(Binary, Func): arg_types = {"this": True, "expression": True, "flag": False} # https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.split.html # limit is the number of times a pattern is applied class RegexpSplit(Func): arg_types = {"this": True, "expression": True, "limit": False} class Repeat(Func): arg_types = {"this": True, "times": True} # https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16 # tsql third argument function == trunctaion if not 0 class Round(Func): arg_types = {"this": True, "decimals": False, "truncate": False} class RowNumber(Func): arg_types = {"this": False} class SafeDivide(Func): arg_types = {"this": True, "expression": True} class SHA(Func): _sql_names = ["SHA", "SHA1"] class SHA2(Func): _sql_names = ["SHA2"] arg_types = {"this": True, "length": False} class Sign(Func): _sql_names = ["SIGN", "SIGNUM"] class SortArray(Func): arg_types = {"this": True, "asc": False} class Split(Func): arg_types = {"this": True, "expression": True, "limit": False} # https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.split_part.html class SplitPart(Func): arg_types = {"this": True, "delimiter": True, "part_index": True} # Start may be omitted in the case of postgres # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 class Substring(Func): _sql_names = ["SUBSTRING", "SUBSTR"] arg_types = {"this": True, "start": False, "length": False} class StandardHash(Func): arg_types = {"this": True, "expression": False} class StartsWith(Func): _sql_names = ["STARTS_WITH", "STARTSWITH"] arg_types = {"this": True, "expression": True} class StrPosition(Func): arg_types = { "this": True, "substr": True, "position": False, "occurrence": False, } class StrToDate(Func): arg_types = {"this": True, "format": False, "safe": False} class StrToTime(Func): arg_types = {"this": True, "format": True, "zone": False, "safe": False} # Spark allows unix_timestamp() # https://spark.apache.org/docs/3.1.3/api/python/reference/api/pyspark.sql.functions.unix_timestamp.html class StrToUnix(Func): arg_types = {"this": False, "format": False} # https://prestodb.io/docs/current/functions/string.html # https://spark.apache.org/docs/latest/api/sql/index.html#str_to_map class StrToMap(Func): arg_types = { "this": True, "pair_delim": False, "key_value_delim": False, "duplicate_resolution_callback": False, } class NumberToStr(Func): arg_types = {"this": True, "format": True, "culture": False} class FromBase(Func): arg_types = {"this": True, "expression": True} class Struct(Func): arg_types = {"expressions": False} is_var_len_args = True class StructExtract(Func): arg_types = {"this": True, "expression": True} # https://learn.microsoft.com/en-us/sql/t-sql/functions/stuff-transact-sql?view=sql-server-ver16 # https://docs.snowflake.com/en/sql-reference/functions/insert class Stuff(Func): _sql_names = ["STUFF", "INSERT"] arg_types = {"this": True, "start": True, "length": True, "expression": True} class Sum(AggFunc): pass class Sqrt(Func): pass class Stddev(AggFunc): _sql_names = ["STDDEV", "STDEV"] class StddevPop(AggFunc): pass class StddevSamp(AggFunc): pass # https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions#time class Time(Func): arg_types = {"this": False, "zone": False} class TimeToStr(Func): arg_types = {"this": True, "format": True, "culture": False, "zone": False} class TimeToTimeStr(Func): pass class TimeToUnix(Func): pass class TimeStrToDate(Func): pass class TimeStrToTime(Func): arg_types = {"this": True, "zone": False} class TimeStrToUnix(Func): pass class Trim(Func): arg_types = { "this": True, "expression": False, "position": False, "collation": False, } class TsOrDsAdd(Func, TimeUnit): # return_type is used to correctly cast the arguments of this expression when transpiling it arg_types = {"this": True, "expression": True, "unit": False, "return_type": False} @property def return_type(self) -> DataType: return DataType.build(self.args.get("return_type") or DataType.Type.DATE) class TsOrDsDiff(Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TsOrDsToDateStr(Func): pass class TsOrDsToDate(Func): arg_types = {"this": True, "format": False, "safe": False} class TsOrDsToDatetime(Func): pass class TsOrDsToTime(Func): arg_types = {"this": True, "format": False, "safe": False} class TsOrDsToTimestamp(Func): pass class TsOrDiToDi(Func): pass class Unhex(Func): arg_types = {"this": True, "expression": False} class Unicode(Func): pass # https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#unix_date class UnixDate(Func): pass class UnixToStr(Func): arg_types = {"this": True, "format": False} # https://prestodb.io/docs/current/functions/datetime.html # presto has weird zone/hours/minutes class UnixToTime(Func): arg_types = { "this": True, "scale": False, "zone": False, "hours": False, "minutes": False, "format": False, } SECONDS = Literal.number(0) DECIS = Literal.number(1) CENTIS = Literal.number(2) MILLIS = Literal.number(3) DECIMILLIS = Literal.number(4) CENTIMILLIS = Literal.number(5) MICROS = Literal.number(6) DECIMICROS = Literal.number(7) CENTIMICROS = Literal.number(8) NANOS = Literal.number(9) class UnixToTimeStr(Func): pass class UnixSeconds(Func): pass class Uuid(Func): _sql_names = ["UUID", "GEN_RANDOM_UUID", "GENERATE_UUID", "UUID_STRING"] arg_types = {"this": False, "name": False} class TimestampFromParts(Func): _sql_names = ["TIMESTAMP_FROM_PARTS", "TIMESTAMPFROMPARTS"] arg_types = { "year": True, "month": True, "day": True, "hour": True, "min": True, "sec": True, "nano": False, "zone": False, "milli": False, } class Upper(Func): _sql_names = ["UPPER", "UCASE"] class Corr(Binary, AggFunc): pass class Variance(AggFunc): _sql_names = ["VARIANCE", "VARIANCE_SAMP", "VAR_SAMP"] class VariancePop(AggFunc): _sql_names = ["VARIANCE_POP", "VAR_POP"] class CovarSamp(Binary, AggFunc): pass class CovarPop(Binary, AggFunc): pass class Week(Func): arg_types = {"this": True, "mode": False} class XMLElement(Func): _sql_names = ["XMLELEMENT"] arg_types = {"this": True, "expressions": False} class XMLTable(Func): arg_types = { "this": True, "namespaces": False, "passing": False, "columns": False, "by_ref": False, } class XMLNamespace(Expression): pass class Year(Func): pass class Use(Expression): arg_types = {"this": False, "expressions": False, "kind": False} class Merge(DML): arg_types = { "this": True, "using": True, "on": True, "whens": True, "with": False, "returning": False, } class When(Expression): arg_types = {"matched": True, "source": False, "condition": False, "then": True} class Whens(Expression): """Wraps around one or more WHEN [NOT] MATCHED [...] clauses.""" arg_types = {"expressions": True} # https://docs.oracle.com/javadb/10.8.3.0/ref/rrefsqljnextvaluefor.html # https://learn.microsoft.com/en-us/sql/t-sql/functions/next-value-for-transact-sql?view=sql-server-ver16 class NextValueFor(Func): arg_types = {"this": True, "order": False} # Refers to a trailing semi-colon. This is only used to preserve trailing comments # select 1; -- my comment class Semicolon(Expression): arg_types = {} def _norm_arg(arg): return arg.lower() if type(arg) is str else arg ALL_FUNCTIONS = subclasses(__name__, Func, (AggFunc, Anonymous, Func)) FUNCTION_BY_NAME = {name: func for func in ALL_FUNCTIONS for name in func.sql_names()} JSON_PATH_PARTS = subclasses(__name__, JSONPathPart, (JSONPathPart,)) PERCENTILES = (PercentileCont, PercentileDisc) # Helpers @t.overload def maybe_parse( sql_or_expression: ExpOrStr, *, into: t.Type[E], dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts, ) -> E: ... @t.overload def maybe_parse( sql_or_expression: str | E, *, into: t.Optional[IntoType] = None, dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts, ) -> E: ... def maybe_parse( sql_or_expression: ExpOrStr, *, into: t.Optional[IntoType] = None, dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts, ) -> Expression: """Gracefully handle a possible string or expression. Example: >>> maybe_parse("1") Literal(this=1, is_string=False) >>> maybe_parse(to_identifier("x")) Identifier(this=x, quoted=False) Args: sql_or_expression: the SQL code string or an expression into: the SQLGlot Expression to parse into dialect: the dialect used to parse the input expressions (in the case that an input expression is a SQL string). prefix: a string to prefix the sql with before it gets parsed (automatically includes a space) copy: whether to copy the expression. **opts: other options to use to parse the input expressions (again, in the case that an input expression is a SQL string). Returns: Expression: the parsed or given expression. """ if isinstance(sql_or_expression, Expression): if copy: return sql_or_expression.copy() return sql_or_expression if sql_or_expression is None: raise ParseError("SQL cannot be None") import sqlglot sql = str(sql_or_expression) if prefix: sql = f"{prefix} {sql}" return sqlglot.parse_one(sql, read=dialect, into=into, **opts) @t.overload def maybe_copy(instance: None, copy: bool = True) -> None: ... @t.overload def maybe_copy(instance: E, copy: bool = True) -> E: ... def maybe_copy(instance, copy=True): return instance.copy() if copy and instance else instance def _to_s(node: t.Any, verbose: bool = False, level: int = 0, repr_str: bool = False) -> str: """Generate a textual representation of an Expression tree""" indent = "\n" + (" " * (level + 1)) delim = f",{indent}" if isinstance(node, Expression): args = {k: v for k, v in node.args.items() if (v is not None and v != []) or verbose} if (node.type or verbose) and not isinstance(node, DataType): args["_type"] = node.type if node.comments or verbose: args["_comments"] = node.comments if verbose: args["_id"] = id(node) # Inline leaves for a more compact representation if node.is_leaf(): indent = "" delim = ", " repr_str = node.is_string or (isinstance(node, Identifier) and node.quoted) items = delim.join( [f"{k}={_to_s(v, verbose, level + 1, repr_str=repr_str)}" for k, v in args.items()] ) return f"{node.__class__.__name__}({indent}{items})" if isinstance(node, list): items = delim.join(_to_s(i, verbose, level + 1) for i in node) items = f"{indent}{items}" if items else "" return f"[{items}]" # We use the representation of the string to avoid stripping out important whitespace if repr_str and isinstance(node, str): node = repr(node) # Indent multiline strings to match the current level return indent.join(textwrap.dedent(str(node).strip("\n")).splitlines()) def _is_wrong_expression(expression, into): return isinstance(expression, Expression) and not isinstance(expression, into) def _apply_builder( expression, instance, arg, copy=True, prefix=None, into=None, dialect=None, into_arg="this", **opts, ): if _is_wrong_expression(expression, into): expression = into(**{into_arg: expression}) instance = maybe_copy(instance, copy) expression = maybe_parse( sql_or_expression=expression, prefix=prefix, into=into, dialect=dialect, **opts, ) instance.set(arg, expression) return instance def _apply_child_list_builder( *expressions, instance, arg, append=True, copy=True, prefix=None, into=None, dialect=None, properties=None, **opts, ): instance = maybe_copy(instance, copy) parsed = [] properties = {} if properties is None else properties for expression in expressions: if expression is not None: if _is_wrong_expression(expression, into): expression = into(expressions=[expression]) expression = maybe_parse( expression, into=into, dialect=dialect, prefix=prefix, **opts, ) for k, v in expression.args.items(): if k == "expressions": parsed.extend(v) else: properties[k] = v existing = instance.args.get(arg) if append and existing: parsed = existing.expressions + parsed child = into(expressions=parsed) for k, v in properties.items(): child.set(k, v) instance.set(arg, child) return instance def _apply_list_builder( *expressions, instance, arg, append=True, copy=True, prefix=None, into=None, dialect=None, **opts, ): inst = maybe_copy(instance, copy) expressions = [ maybe_parse( sql_or_expression=expression, into=into, prefix=prefix, dialect=dialect, **opts, ) for expression in expressions if expression is not None ] existing_expressions = inst.args.get(arg) if append and existing_expressions: expressions = existing_expressions + expressions inst.set(arg, expressions) return inst def _apply_conjunction_builder( *expressions, instance, arg, into=None, append=True, copy=True, dialect=None, **opts, ): expressions = [exp for exp in expressions if exp is not None and exp != ""] if not expressions: return instance inst = maybe_copy(instance, copy) existing = inst.args.get(arg) if append and existing is not None: expressions = [existing.this if into else existing] + list(expressions) node = and_(*expressions, dialect=dialect, copy=copy, **opts) inst.set(arg, into(this=node) if into else node) return inst def _apply_cte_builder( instance: E, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, scalar: bool = False, **opts, ) -> E: alias_expression = maybe_parse(alias, dialect=dialect, into=TableAlias, **opts) as_expression = maybe_parse(as_, dialect=dialect, copy=copy, **opts) if scalar and not isinstance(as_expression, Subquery): # scalar CTE must be wrapped in a subquery as_expression = Subquery(this=as_expression) cte = CTE(this=as_expression, alias=alias_expression, materialized=materialized, scalar=scalar) return _apply_child_list_builder( cte, instance=instance, arg="with", append=append, copy=copy, into=With, properties={"recursive": recursive or False}, ) def _combine( expressions: t.Sequence[t.Optional[ExpOrStr]], operator: t.Type[Connector], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Expression: conditions = [ condition(expression, dialect=dialect, copy=copy, **opts) for expression in expressions if expression is not None ] this, *rest = conditions if rest and wrap: this = _wrap(this, Connector) for expression in rest: this = operator(this=this, expression=_wrap(expression, Connector) if wrap else expression) return this @t.overload def _wrap(expression: None, kind: t.Type[Expression]) -> None: ... @t.overload def _wrap(expression: E, kind: t.Type[Expression]) -> E | Paren: ... def _wrap(expression: t.Optional[E], kind: t.Type[Expression]) -> t.Optional[E] | Paren: return Paren(this=expression) if isinstance(expression, kind) else expression def _apply_set_operation( *expressions: ExpOrStr, set_operation: t.Type[S], distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> S: return reduce( lambda x, y: set_operation(this=x, expression=y, distinct=distinct), (maybe_parse(e, dialect=dialect, copy=copy, **opts) for e in expressions), ) def union( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Union: """ Initializes a syntax tree for the `UNION` operation. Example: >>> union("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo UNION SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `UNION`'s operands. If `Expression` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Union instance. """ assert len(expressions) >= 2, "At least two expressions are required by `union`." return _apply_set_operation( *expressions, set_operation=Union, distinct=distinct, dialect=dialect, copy=copy, **opts ) def intersect( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Intersect: """ Initializes a syntax tree for the `INTERSECT` operation. Example: >>> intersect("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo INTERSECT SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `INTERSECT`'s operands. If `Expression` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Intersect instance. """ assert len(expressions) >= 2, "At least two expressions are required by `intersect`." return _apply_set_operation( *expressions, set_operation=Intersect, distinct=distinct, dialect=dialect, copy=copy, **opts ) def except_( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Except: """ Initializes a syntax tree for the `EXCEPT` operation. Example: >>> except_("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo EXCEPT SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `EXCEPT`'s operands. If `Expression` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Except instance. """ assert len(expressions) >= 2, "At least two expressions are required by `except_`." return _apply_set_operation( *expressions, set_operation=Except, distinct=distinct, dialect=dialect, copy=copy, **opts ) def select(*expressions: ExpOrStr, dialect: DialectType = None, **opts) -> Select: """ Initializes a syntax tree from one or multiple SELECT expressions. Example: >>> select("col1", "col2").from_("tbl").sql() 'SELECT col1, col2 FROM tbl' Args: *expressions: the SQL code string to parse as the expressions of a SELECT statement. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expressions (in the case that an input expression is a SQL string). **opts: other options to use to parse the input expressions (again, in the case that an input expression is a SQL string). Returns: Select: the syntax tree for the SELECT statement. """ return Select().select(*expressions, dialect=dialect, **opts) def from_(expression: ExpOrStr, dialect: DialectType = None, **opts) -> Select: """ Initializes a syntax tree from a FROM expression. Example: >>> from_("tbl").select("col1", "col2").sql() 'SELECT col1, col2 FROM tbl' Args: *expression: the SQL code string to parse as the FROM expressions of a SELECT statement. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression (in the case that the input expression is a SQL string). **opts: other options to use to parse the input expressions (again, in the case that the input expression is a SQL string). Returns: Select: the syntax tree for the SELECT statement. """ return Select().from_(expression, dialect=dialect, **opts) def update( table: str | Table, properties: t.Optional[dict] = None, where: t.Optional[ExpOrStr] = None, from_: t.Optional[ExpOrStr] = None, with_: t.Optional[t.Dict[str, ExpOrStr]] = None, dialect: DialectType = None, **opts, ) -> Update: """ Creates an update statement. Example: >>> update("my_table", {"x": 1, "y": "2", "z": None}, from_="baz_cte", where="baz_cte.id > 1 and my_table.id = baz_cte.id", with_={"baz_cte": "SELECT id FROM foo"}).sql() "WITH baz_cte AS (SELECT id FROM foo) UPDATE my_table SET x = 1, y = '2', z = NULL FROM baz_cte WHERE baz_cte.id > 1 AND my_table.id = baz_cte.id" Args: properties: dictionary of properties to SET which are auto converted to sql objects eg None -> NULL where: sql conditional parsed into a WHERE statement from_: sql statement parsed into a FROM statement with_: dictionary of CTE aliases / select statements to include in a WITH clause. dialect: the dialect used to parse the input expressions. **opts: other options to use to parse the input expressions. Returns: Update: the syntax tree for the UPDATE statement. """ update_expr = Update(this=maybe_parse(table, into=Table, dialect=dialect)) if properties: update_expr.set( "expressions", [ EQ(this=maybe_parse(k, dialect=dialect, **opts), expression=convert(v)) for k, v in properties.items() ], ) if from_: update_expr.set( "from", maybe_parse(from_, into=From, dialect=dialect, prefix="FROM", **opts), ) if isinstance(where, Condition): where = Where(this=where) if where: update_expr.set( "where", maybe_parse(where, into=Where, dialect=dialect, prefix="WHERE", **opts), ) if with_: cte_list = [ alias_(CTE(this=maybe_parse(qry, dialect=dialect, **opts)), alias, table=True) for alias, qry in with_.items() ] update_expr.set( "with", With(expressions=cte_list), ) return update_expr def delete( table: ExpOrStr, where: t.Optional[ExpOrStr] = None, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, **opts, ) -> Delete: """ Builds a delete statement. Example: >>> delete("my_table", where="id > 1").sql() 'DELETE FROM my_table WHERE id > 1' Args: where: sql conditional parsed into a WHERE statement returning: sql conditional parsed into a RETURNING statement dialect: the dialect used to parse the input expressions. **opts: other options to use to parse the input expressions. Returns: Delete: the syntax tree for the DELETE statement. """ delete_expr = Delete().delete(table, dialect=dialect, copy=False, **opts) if where: delete_expr = delete_expr.where(where, dialect=dialect, copy=False, **opts) if returning: delete_expr = delete_expr.returning(returning, dialect=dialect, copy=False, **opts) return delete_expr def insert( expression: ExpOrStr, into: ExpOrStr, columns: t.Optional[t.Sequence[str | Identifier]] = None, overwrite: t.Optional[bool] = None, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Insert: """ Builds an INSERT statement. Example: >>> insert("VALUES (1, 2, 3)", "tbl").sql() 'INSERT INTO tbl VALUES (1, 2, 3)' Args: expression: the sql string or expression of the INSERT statement into: the tbl to insert data to. columns: optionally the table's column names. overwrite: whether to INSERT OVERWRITE or not. returning: sql conditional parsed into a RETURNING statement dialect: the dialect used to parse the input expressions. copy: whether to copy the expression. **opts: other options to use to parse the input expressions. Returns: Insert: the syntax tree for the INSERT statement. """ expr = maybe_parse(expression, dialect=dialect, copy=copy, **opts) this: Table | Schema = maybe_parse(into, into=Table, dialect=dialect, copy=copy, **opts) if columns: this = Schema(this=this, expressions=[to_identifier(c, copy=copy) for c in columns]) insert = Insert(this=this, expression=expr, overwrite=overwrite) if returning: insert = insert.returning(returning, dialect=dialect, copy=False, **opts) return insert def merge( *when_exprs: ExpOrStr, into: ExpOrStr, using: ExpOrStr, on: ExpOrStr, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Merge: """ Builds a MERGE statement. Example: >>> merge("WHEN MATCHED THEN UPDATE SET col1 = source_table.col1", ... "WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1)", ... into="my_table", ... using="source_table", ... on="my_table.id = source_table.id").sql() 'MERGE INTO my_table USING source_table ON my_table.id = source_table.id WHEN MATCHED THEN UPDATE SET col1 = source_table.col1 WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1)' Args: *when_exprs: The WHEN clauses specifying actions for matched and unmatched rows. into: The target table to merge data into. using: The source table to merge data from. on: The join condition for the merge. returning: The columns to return from the merge. dialect: The dialect used to parse the input expressions. copy: Whether to copy the expression. **opts: Other options to use to parse the input expressions. Returns: Merge: The syntax tree for the MERGE statement. """ expressions: t.List[Expression] = [] for when_expr in when_exprs: expression = maybe_parse(when_expr, dialect=dialect, copy=copy, into=Whens, **opts) expressions.extend([expression] if isinstance(expression, When) else expression.expressions) merge = Merge( this=maybe_parse(into, dialect=dialect, copy=copy, **opts), using=maybe_parse(using, dialect=dialect, copy=copy, **opts), on=maybe_parse(on, dialect=dialect, copy=copy, **opts), whens=Whens(expressions=expressions), ) if returning: merge = merge.returning(returning, dialect=dialect, copy=False, **opts) return merge def condition( expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts ) -> Condition: """ Initialize a logical condition expression. Example: >>> condition("x=1").sql() 'x = 1' This is helpful for composing larger logical syntax trees: >>> where = condition("x=1") >>> where = where.and_("y=1") >>> Select().from_("tbl").select("*").where(where).sql() 'SELECT * FROM tbl WHERE x = 1 AND y = 1' Args: *expression: the SQL code string to parse. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression (in the case that the input expression is a SQL string). copy: Whether to copy `expression` (only applies to expressions). **opts: other options to use to parse the input expressions (again, in the case that the input expression is a SQL string). Returns: The new Condition instance """ return maybe_parse( expression, into=Condition, dialect=dialect, copy=copy, **opts, ) def and_( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an AND logical operator. Example: >>> and_("x=1", and_("y=1", "z=1")).sql() 'x = 1 AND (y = 1 AND z = 1)' Args: *expressions: the SQL code strings to parse. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Expressions). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, And, dialect, copy=copy, wrap=wrap, **opts)) def or_( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an OR logical operator. Example: >>> or_("x=1", or_("y=1", "z=1")).sql() 'x = 1 OR (y = 1 OR z = 1)' Args: *expressions: the SQL code strings to parse. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Expressions). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, Or, dialect, copy=copy, wrap=wrap, **opts)) def xor( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an XOR logical operator. Example: >>> xor("x=1", xor("y=1", "z=1")).sql() 'x = 1 XOR (y = 1 XOR z = 1)' Args: *expressions: the SQL code strings to parse. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Expressions). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, Xor, dialect, copy=copy, wrap=wrap, **opts)) def not_(expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts) -> Not: """ Wrap a condition with a NOT operator. Example: >>> not_("this_suit='black'").sql() "NOT this_suit = 'black'" Args: expression: the SQL code string to parse. If an Expression instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the expression or not. **opts: other options to use to parse the input expressions. Returns: The new condition. """ this = condition( expression, dialect=dialect, copy=copy, **opts, ) return Not(this=_wrap(this, Connector)) def paren(expression: ExpOrStr, copy: bool = True) -> Paren: """ Wrap an expression in parentheses. Example: >>> paren("5 + 3").sql() '(5 + 3)' Args: expression: the SQL code string to parse. If an Expression instance is passed, this is used as-is. copy: whether to copy the expression or not. Returns: The wrapped expression. """ return Paren(this=maybe_parse(expression, copy=copy)) SAFE_IDENTIFIER_RE: t.Pattern[str] = re.compile(r"^[_a-zA-Z][\w]*$") @t.overload def to_identifier(name: None, quoted: t.Optional[bool] = None, copy: bool = True) -> None: ... @t.overload def to_identifier( name: str | Identifier, quoted: t.Optional[bool] = None, copy: bool = True ) -> Identifier: ... def to_identifier(name, quoted=None, copy=True): """Builds an identifier. Args: name: The name to turn into an identifier. quoted: Whether to force quote the identifier. copy: Whether to copy name if it's an Identifier. Returns: The identifier ast node. """ if name is None: return None if isinstance(name, Identifier): identifier = maybe_copy(name, copy) elif isinstance(name, str): identifier = Identifier( this=name, quoted=not SAFE_IDENTIFIER_RE.match(name) if quoted is None else quoted, ) else: raise ValueError(f"Name needs to be a string or an Identifier, got: {name.__class__}") return identifier def parse_identifier(name: str | Identifier, dialect: DialectType = None) -> Identifier: """ Parses a given string into an identifier. Args: name: The name to parse into an identifier. dialect: The dialect to parse against. Returns: The identifier ast node. """ try: expression = maybe_parse(name, dialect=dialect, into=Identifier) except (ParseError, TokenError): expression = to_identifier(name) return expression INTERVAL_STRING_RE = re.compile(r"\s*([0-9]+)\s*([a-zA-Z]+)\s*") def to_interval(interval: str | Literal) -> Interval: """Builds an interval expression from a string like '1 day' or '5 months'.""" if isinstance(interval, Literal): if not interval.is_string: raise ValueError("Invalid interval string.") interval = interval.this interval = maybe_parse(f"INTERVAL {interval}") assert isinstance(interval, Interval) return interval def to_table( sql_path: str | Table, dialect: DialectType = None, copy: bool = True, **kwargs ) -> Table: """ Create a table expression from a `[catalog].[schema].[table]` sql path. Catalog and schema are optional. If a table is passed in then that table is returned. Args: sql_path: a `[catalog].[schema].[table]` string. dialect: the source dialect according to which the table name will be parsed. copy: Whether to copy a table if it is passed in. kwargs: the kwargs to instantiate the resulting `Table` expression with. Returns: A table expression. """ if isinstance(sql_path, Table): return maybe_copy(sql_path, copy=copy) table = maybe_parse(sql_path, into=Table, dialect=dialect) for k, v in kwargs.items(): table.set(k, v) return table def to_column( sql_path: str | Column, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **kwargs, ) -> Column: """ Create a column from a `[table].[column]` sql path. Table is optional. If a column is passed in then that column is returned. Args: sql_path: a `[table].[column]` string. quoted: Whether or not to force quote identifiers. dialect: the source dialect according to which the column name will be parsed. copy: Whether to copy a column if it is passed in. kwargs: the kwargs to instantiate the resulting `Column` expression with. Returns: A column expression. """ if isinstance(sql_path, Column): return maybe_copy(sql_path, copy=copy) try: col = maybe_parse(sql_path, into=Column, dialect=dialect) except ParseError: return column(*reversed(sql_path.split(".")), quoted=quoted, **kwargs) for k, v in kwargs.items(): col.set(k, v) if quoted: for i in col.find_all(Identifier): i.set("quoted", True) return col def alias_( expression: ExpOrStr, alias: t.Optional[str | Identifier], table: bool | t.Sequence[str | Identifier] = False, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **opts, ): """Create an Alias expression. Example: >>> alias_('foo', 'bar').sql() 'foo AS bar' >>> alias_('(select 1, 2)', 'bar', table=['a', 'b']).sql() '(SELECT 1, 2) AS bar(a, b)' Args: expression: the SQL code strings to parse. If an Expression instance is passed, this is used as-is. alias: the alias name to use. If the name has special characters it is quoted. table: Whether to create a table alias, can also be a list of columns. quoted: whether to quote the alias dialect: the dialect used to parse the input expression. copy: Whether to copy the expression. **opts: other options to use to parse the input expressions. Returns: Alias: the aliased expression """ exp = maybe_parse(expression, dialect=dialect, copy=copy, **opts) alias = to_identifier(alias, quoted=quoted) if table: table_alias = TableAlias(this=alias) exp.set("alias", table_alias) if not isinstance(table, bool): for column in table: table_alias.append("columns", to_identifier(column, quoted=quoted)) return exp # We don't set the "alias" arg for Window expressions, because that would add an IDENTIFIER node in # the AST, representing a "named_window" [1] construct (eg. bigquery). What we want is an ALIAS node # for the complete Window expression. # # [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls if "alias" in exp.arg_types and not isinstance(exp, Window): exp.set("alias", alias) return exp return Alias(this=exp, alias=alias) def subquery( expression: ExpOrStr, alias: t.Optional[Identifier | str] = None, dialect: DialectType = None, **opts, ) -> Select: """ Build a subquery expression that's selected from. Example: >>> subquery('select x from tbl', 'bar').select('x').sql() 'SELECT x FROM (SELECT x FROM tbl) AS bar' Args: expression: the SQL code strings to parse. If an Expression instance is passed, this is used as-is. alias: the alias name to use. dialect: the dialect used to parse the input expression. **opts: other options to use to parse the input expressions. Returns: A new Select instance with the subquery expression included. """ expression = maybe_parse(expression, dialect=dialect, **opts).subquery(alias, **opts) return Select().from_(expression, dialect=dialect, **opts) @t.overload def column( col: str | Identifier, table: t.Optional[str | Identifier] = None, db: t.Optional[str | Identifier] = None, catalog: t.Optional[str | Identifier] = None, *, fields: t.Collection[t.Union[str, Identifier]], quoted: t.Optional[bool] = None, copy: bool = True, ) -> Dot: pass @t.overload def column( col: str | Identifier, table: t.Optional[str | Identifier] = None, db: t.Optional[str | Identifier] = None, catalog: t.Optional[str | Identifier] = None, *, fields: Lit[None] = None, quoted: t.Optional[bool] = None, copy: bool = True, ) -> Column: pass def column( col, table=None, db=None, catalog=None, *, fields=None, quoted=None, copy=True, ): """ Build a Column. Args: col: Column name. table: Table name. db: Database name. catalog: Catalog name. fields: Additional fields using dots. quoted: Whether to force quotes on the column's identifiers. copy: Whether to copy identifiers if passed in. Returns: The new Column instance. """ this = Column( this=to_identifier(col, quoted=quoted, copy=copy), table=to_identifier(table, quoted=quoted, copy=copy), db=to_identifier(db, quoted=quoted, copy=copy), catalog=to_identifier(catalog, quoted=quoted, copy=copy), ) if fields: this = Dot.build( (this, *(to_identifier(field, quoted=quoted, copy=copy) for field in fields)) ) return this def cast( expression: ExpOrStr, to: DATA_TYPE, copy: bool = True, dialect: DialectType = None, **opts ) -> Cast: """Cast an expression to a data type. Example: >>> cast('x + 1', 'int').sql() 'CAST(x + 1 AS INT)' Args: expression: The expression to cast. to: The datatype to cast to. copy: Whether to copy the supplied expressions. dialect: The target dialect. This is used to prevent a re-cast in the following scenario: - The expression to be cast is already a exp.Cast expression - The existing cast is to a type that is logically equivalent to new type For example, if :expression='CAST(x as DATETIME)' and :to=Type.TIMESTAMP, but in the target dialect DATETIME is mapped to TIMESTAMP, then we will NOT return `CAST(x (as DATETIME) as TIMESTAMP)` and instead just return the original expression `CAST(x as DATETIME)`. This is to prevent it being output as a double cast `CAST(x (as TIMESTAMP) as TIMESTAMP)` once the DATETIME -> TIMESTAMP mapping is applied in the target dialect generator. Returns: The new Cast instance. """ expr = maybe_parse(expression, copy=copy, dialect=dialect, **opts) data_type = DataType.build(to, copy=copy, dialect=dialect, **opts) # dont re-cast if the expression is already a cast to the correct type if isinstance(expr, Cast): from sqlglot.dialects.dialect import Dialect target_dialect = Dialect.get_or_raise(dialect) type_mapping = target_dialect.generator_class.TYPE_MAPPING existing_cast_type: DataType.Type = expr.to.this new_cast_type: DataType.Type = data_type.this types_are_equivalent = type_mapping.get( existing_cast_type, existing_cast_type.value ) == type_mapping.get(new_cast_type, new_cast_type.value) if expr.is_type(data_type) or types_are_equivalent: return expr expr = Cast(this=expr, to=data_type) expr.type = data_type return expr def table_( table: Identifier | str, db: t.Optional[Identifier | str] = None, catalog: t.Optional[Identifier | str] = None, quoted: t.Optional[bool] = None, alias: t.Optional[Identifier | str] = None, ) -> Table: """Build a Table. Args: table: Table name. db: Database name. catalog: Catalog name. quote: Whether to force quotes on the table's identifiers. alias: Table's alias. Returns: The new Table instance. """ return Table( this=to_identifier(table, quoted=quoted) if table else None, db=to_identifier(db, quoted=quoted) if db else None, catalog=to_identifier(catalog, quoted=quoted) if catalog else None, alias=TableAlias(this=to_identifier(alias)) if alias else None, ) def values( values: t.Iterable[t.Tuple[t.Any, ...]], alias: t.Optional[str] = None, columns: t.Optional[t.Iterable[str] | t.Dict[str, DataType]] = None, ) -> Values: """Build VALUES statement. Example: >>> values([(1, '2')]).sql() "VALUES (1, '2')" Args: values: values statements that will be converted to SQL alias: optional alias columns: Optional list of ordered column names or ordered dictionary of column names to types. If either are provided then an alias is also required. Returns: Values: the Values expression object """ if columns and not alias: raise ValueError("Alias is required when providing columns") return Values( expressions=[convert(tup) for tup in values], alias=( TableAlias(this=to_identifier(alias), columns=[to_identifier(x) for x in columns]) if columns else (TableAlias(this=to_identifier(alias)) if alias else None) ), ) def var(name: t.Optional[ExpOrStr]) -> Var: """Build a SQL variable. Example: >>> repr(var('x')) 'Var(this=x)' >>> repr(var(column('x', table='y'))) 'Var(this=x)' Args: name: The name of the var or an expression who's name will become the var. Returns: The new variable node. """ if not name: raise ValueError("Cannot convert empty name into var.") if isinstance(name, Expression): name = name.name return Var(this=name) def rename_table( old_name: str | Table, new_name: str | Table, dialect: DialectType = None, ) -> Alter: """Build ALTER TABLE... RENAME... expression Args: old_name: The old name of the table new_name: The new name of the table dialect: The dialect to parse the table. Returns: Alter table expression """ old_table = to_table(old_name, dialect=dialect) new_table = to_table(new_name, dialect=dialect) return Alter( this=old_table, kind="TABLE", actions=[ AlterRename(this=new_table), ], ) def rename_column( table_name: str | Table, old_column_name: str | Column, new_column_name: str | Column, exists: t.Optional[bool] = None, dialect: DialectType = None, ) -> Alter: """Build ALTER TABLE... RENAME COLUMN... expression Args: table_name: Name of the table old_column: The old name of the column new_column: The new name of the column exists: Whether to add the `IF EXISTS` clause dialect: The dialect to parse the table/column. Returns: Alter table expression """ table = to_table(table_name, dialect=dialect) old_column = to_column(old_column_name, dialect=dialect) new_column = to_column(new_column_name, dialect=dialect) return Alter( this=table, kind="TABLE", actions=[ RenameColumn(this=old_column, to=new_column, exists=exists), ], ) def convert(value: t.Any, copy: bool = False) -> Expression: """Convert a python value into an expression object. Raises an error if a conversion is not possible. Args: value: A python object. copy: Whether to copy `value` (only applies to Expressions and collections). Returns: The equivalent expression object. """ if isinstance(value, Expression): return maybe_copy(value, copy) if isinstance(value, str): return Literal.string(value) if isinstance(value, bool): return Boolean(this=value) if value is None or (isinstance(value, float) and math.isnan(value)): return null() if isinstance(value, numbers.Number): return Literal.number(value) if isinstance(value, bytes): return HexString(this=value.hex()) if isinstance(value, datetime.datetime): datetime_literal = Literal.string(value.isoformat(sep=" ")) tz = None if value.tzinfo: # this works for zoneinfo.ZoneInfo, pytz.timezone and datetime.datetime.utc to return IANA timezone names like "America/Los_Angeles" # instead of abbreviations like "PDT". This is for consistency with other timezone handling functions in SQLGlot tz = Literal.string(str(value.tzinfo)) return TimeStrToTime(this=datetime_literal, zone=tz) if isinstance(value, datetime.date): date_literal = Literal.string(value.strftime("%Y-%m-%d")) return DateStrToDate(this=date_literal) if isinstance(value, tuple): if hasattr(value, "_fields"): return Struct( expressions=[ PropertyEQ( this=to_identifier(k), expression=convert(getattr(value, k), copy=copy) ) for k in value._fields ] ) return Tuple(expressions=[convert(v, copy=copy) for v in value]) if isinstance(value, list): return Array(expressions=[convert(v, copy=copy) for v in value]) if isinstance(value, dict): return Map( keys=Array(expressions=[convert(k, copy=copy) for k in value]), values=Array(expressions=[convert(v, copy=copy) for v in value.values()]), ) if hasattr(value, "__dict__"): return Struct( expressions=[ PropertyEQ(this=to_identifier(k), expression=convert(v, copy=copy)) for k, v in value.__dict__.items() ] ) raise ValueError(f"Cannot convert {value}") def replace_children(expression: Expression, fun: t.Callable, *args, **kwargs) -> None: """ Replace children of an expression with the result of a lambda fun(child) -> exp. """ for k, v in tuple(expression.args.items()): is_list_arg = type(v) is list child_nodes = v if is_list_arg else [v] new_child_nodes = [] for cn in child_nodes: if isinstance(cn, Expression): for child_node in ensure_collection(fun(cn, *args, **kwargs)): new_child_nodes.append(child_node) else: new_child_nodes.append(cn) expression.set(k, new_child_nodes if is_list_arg else seq_get(new_child_nodes, 0)) def replace_tree( expression: Expression, fun: t.Callable, prune: t.Optional[t.Callable[[Expression], bool]] = None, ) -> Expression: """ Replace an entire tree with the result of function calls on each node. This will be traversed in reverse dfs, so leaves first. If new nodes are created as a result of function calls, they will also be traversed. """ stack = list(expression.dfs(prune=prune)) while stack: node = stack.pop() new_node = fun(node) if new_node is not node: node.replace(new_node) if isinstance(new_node, Expression): stack.append(new_node) return new_node def column_table_names(expression: Expression, exclude: str = "") -> t.Set[str]: """ Return all table names referenced through columns in an expression. Example: >>> import sqlglot >>> sorted(column_table_names(sqlglot.parse_one("a.b AND c.d AND c.e"))) ['a', 'c'] Args: expression: expression to find table names. exclude: a table name to exclude Returns: A list of unique names. """ return { table for table in (column.table for column in expression.find_all(Column)) if table and table != exclude } def table_name(table: Table | str, dialect: DialectType = None, identify: bool = False) -> str: """Get the full name of a table as a string. Args: table: Table expression node or string. dialect: The dialect to generate the table name for. identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote. Examples: >>> from sqlglot import exp, parse_one >>> table_name(parse_one("select * from a.b.c").find(exp.Table)) 'a.b.c' Returns: The table name. """ table = maybe_parse(table, into=Table, dialect=dialect) if not table: raise ValueError(f"Cannot parse {table}") return ".".join( ( part.sql(dialect=dialect, identify=True, copy=False, comments=False) if identify or not SAFE_IDENTIFIER_RE.match(part.name) else part.name ) for part in table.parts ) def normalize_table_name(table: str | Table, dialect: DialectType = None, copy: bool = True) -> str: """Returns a case normalized table name without quotes. Args: table: the table to normalize dialect: the dialect to use for normalization rules copy: whether to copy the expression. Examples: >>> normalize_table_name("`A-B`.c", dialect="bigquery") 'A-B.c' """ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers return ".".join( p.name for p in normalize_identifiers( to_table(table, dialect=dialect, copy=copy), dialect=dialect ).parts ) def replace_tables( expression: E, mapping: t.Dict[str, str], dialect: DialectType = None, copy: bool = True ) -> E: """Replace all tables in expression according to the mapping. Args: expression: expression node to be transformed and replaced. mapping: mapping of table names. dialect: the dialect of the mapping table copy: whether to copy the expression. Examples: >>> from sqlglot import exp, parse_one >>> replace_tables(parse_one("select * from a.b"), {"a.b": "c"}).sql() 'SELECT * FROM c /* a.b */' Returns: The mapped expression. """ mapping = {normalize_table_name(k, dialect=dialect): v for k, v in mapping.items()} def _replace_tables(node: Expression) -> Expression: if isinstance(node, Table) and node.meta.get("replace") is not False: original = normalize_table_name(node, dialect=dialect) new_name = mapping.get(original) if new_name: table = to_table( new_name, **{k: v for k, v in node.args.items() if k not in TABLE_PARTS}, dialect=dialect, ) table.add_comments([original]) return table return node return expression.transform(_replace_tables, copy=copy) # type: ignore def replace_placeholders(expression: Expression, *args, **kwargs) -> Expression: """Replace placeholders in an expression. Args: expression: expression node to be transformed and replaced. args: positional names that will substitute unnamed placeholders in the given order. kwargs: keyword arguments that will substitute named placeholders. Examples: >>> from sqlglot import exp, parse_one >>> replace_placeholders( ... parse_one("select * from :tbl where ? = ?"), ... exp.to_identifier("str_col"), "b", tbl=exp.to_identifier("foo") ... ).sql() "SELECT * FROM foo WHERE str_col = 'b'" Returns: The mapped expression. """ def _replace_placeholders(node: Expression, args, **kwargs) -> Expression: if isinstance(node, Placeholder): if node.this: new_name = kwargs.get(node.this) if new_name is not None: return convert(new_name) else: try: return convert(next(args)) except StopIteration: pass return node return expression.transform(_replace_placeholders, iter(args), **kwargs) def expand( expression: Expression, sources: t.Dict[str, Query | t.Callable[[], Query]], dialect: DialectType = None, copy: bool = True, ) -> Expression: """Transforms an expression by expanding all referenced sources into subqueries. Examples: >>> from sqlglot import parse_one >>> expand(parse_one("select * from x AS z"), {"x": parse_one("select * from y")}).sql() 'SELECT * FROM (SELECT * FROM y) AS z /* source: x */' >>> expand(parse_one("select * from x AS z"), {"x": parse_one("select * from y"), "y": parse_one("select * from z")}).sql() 'SELECT * FROM (SELECT * FROM (SELECT * FROM z) AS y /* source: y */) AS z /* source: x */' Args: expression: The expression to expand. sources: A dict of name to query or a callable that provides a query on demand. dialect: The dialect of the sources dict or the callable. copy: Whether to copy the expression during transformation. Defaults to True. Returns: The transformed expression. """ normalized_sources = {normalize_table_name(k, dialect=dialect): v for k, v in sources.items()} def _expand(node: Expression): if isinstance(node, Table): name = normalize_table_name(node, dialect=dialect) source = normalized_sources.get(name) if source: # Create a subquery with the same alias (or table name if no alias) parsed_source = source() if callable(source) else source subquery = parsed_source.subquery(node.alias or name) subquery.comments = [f"source: {name}"] # Continue expanding within the subquery return subquery.transform(_expand, copy=False) return node return expression.transform(_expand, copy=copy) def func(name: str, *args, copy: bool = True, dialect: DialectType = None, **kwargs) -> Func: """ Returns a Func expression. Examples: >>> func("abs", 5).sql() 'ABS(5)' >>> func("cast", this=5, to=DataType.build("DOUBLE")).sql() 'CAST(5 AS DOUBLE)' Args: name: the name of the function to build. args: the args used to instantiate the function of interest. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Note: The arguments `args` and `kwargs` are mutually exclusive. Returns: An instance of the function of interest, or an anonymous function, if `name` doesn't correspond to an existing `sqlglot.expressions.Func` class. """ if args and kwargs: raise ValueError("Can't use both args and kwargs to instantiate a function.") from sqlglot.dialects.dialect import Dialect dialect = Dialect.get_or_raise(dialect) converted: t.List[Expression] = [maybe_parse(arg, dialect=dialect, copy=copy) for arg in args] kwargs = {key: maybe_parse(value, dialect=dialect, copy=copy) for key, value in kwargs.items()} constructor = dialect.parser_class.FUNCTIONS.get(name.upper()) if constructor: if converted: if "dialect" in constructor.__code__.co_varnames: function = constructor(converted, dialect=dialect) else: function = constructor(converted) elif constructor.__name__ == "from_arg_list": function = constructor.__self__(**kwargs) # type: ignore else: constructor = FUNCTION_BY_NAME.get(name.upper()) if constructor: function = constructor(**kwargs) else: raise ValueError( f"Unable to convert '{name}' into a Func. Either manually construct " "the Func expression of interest or parse the function call." ) else: kwargs = kwargs or {"expressions": converted} function = Anonymous(this=name, **kwargs) for error_message in function.error_messages(converted): raise ValueError(error_message) return function def case( expression: t.Optional[ExpOrStr] = None, **opts, ) -> Case: """ Initialize a CASE statement. Example: case().when("a = 1", "foo").else_("bar") Args: expression: Optionally, the input expression (not all dialects support this) **opts: Extra keyword arguments for parsing `expression` """ if expression is not None: this = maybe_parse(expression, **opts) else: this = None return Case(this=this, ifs=[]) def array( *expressions: ExpOrStr, copy: bool = True, dialect: DialectType = None, **kwargs ) -> Array: """ Returns an array. Examples: >>> array(1, 'x').sql() 'ARRAY(1, x)' Args: expressions: the expressions to add to the array. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Returns: An array expression. """ return Array( expressions=[ maybe_parse(expression, copy=copy, dialect=dialect, **kwargs) for expression in expressions ] ) def tuple_( *expressions: ExpOrStr, copy: bool = True, dialect: DialectType = None, **kwargs ) -> Tuple: """ Returns an tuple. Examples: >>> tuple_(1, 'x').sql() '(1, x)' Args: expressions: the expressions to add to the tuple. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Returns: A tuple expression. """ return Tuple( expressions=[ maybe_parse(expression, copy=copy, dialect=dialect, **kwargs) for expression in expressions ] ) def true() -> Boolean: """ Returns a true Boolean expression. """ return Boolean(this=True) def false() -> Boolean: """ Returns a false Boolean expression. """ return Boolean(this=False) def null() -> Null: """ Returns a Null expression. """ return Null() NONNULL_CONSTANTS = ( Literal, Boolean, ) CONSTANTS = ( Literal, Boolean, Null, )