2025-02-13 15:01:55 +01:00
"""
2025-02-13 15:07:05 +01:00
## Expressions
Every AST node in SQLGlot is represented by a subclass of ` Expression ` .
This module contains the implementation of all supported ` Expression ` types . Additionally ,
it exposes a number of helper functions , which are mainly used to programmatically build
SQL expressions , such as ` sqlglot . expressions . select ` .
2025-02-13 15:23:26 +01:00
2025-02-13 15:07:05 +01:00
- - - -
2025-02-13 15:01:55 +01:00
"""
2025-02-13 14:53:05 +01:00
from __future__ import annotations
2025-02-13 14:46:58 +01:00
import datetime
2025-02-13 15:01:55 +01:00
import math
2025-02-13 14:31:47 +01:00
import numbers
2025-02-13 06:15:54 +01:00
import re
2025-02-13 21:19:14 +01:00
import textwrap
2025-02-13 14:53:05 +01:00
import typing as t
2025-02-13 06:15:54 +01:00
from collections import deque
from copy import deepcopy
2025-02-13 21:41:14 +01:00
from decimal import Decimal
2025-02-13 06:15:54 +01:00
from enum import auto
2025-02-13 21:01:12 +01:00
from functools import reduce
2025-02-13 06:15:54 +01:00
2025-02-13 21:09:41 +01:00
from sqlglot . errors import ErrorLevel , ParseError
2025-02-13 14:40:43 +01:00
from sqlglot . helper import (
AutoName ,
camel_to_snake_case ,
2025-02-13 14:53:05 +01:00
ensure_collection ,
2025-02-13 15:48:10 +01:00
ensure_list ,
2025-02-13 14:53:05 +01:00
seq_get ,
2025-02-13 14:40:43 +01:00
subclasses ,
2025-02-13 21:58:22 +01:00
to_bool ,
2025-02-13 14:40:43 +01:00
)
2025-02-13 21:51:42 +01:00
from sqlglot . tokens import Token , TokenError
2025-02-13 06:15:54 +01:00
2025-02-13 14:53:05 +01:00
if t . TYPE_CHECKING :
2025-02-13 21:55:19 +01:00
from typing_extensions import Self
2025-02-13 21:19:58 +01:00
from sqlglot . _typing import E , Lit
2025-02-13 15:09:58 +01:00
from sqlglot . dialects . dialect import DialectType
2025-02-13 15:07:05 +01:00
2025-02-13 21:30:28 +01:00
Q = t . TypeVar ( " Q " , bound = " Query " )
2025-02-13 21:41:14 +01:00
S = t . TypeVar ( " S " , bound = " SetOperation " )
2025-02-13 21:30:28 +01:00
2025-02-13 06:15:54 +01:00
class _Expression ( type ) :
def __new__ ( cls , clsname , bases , attrs ) :
klass = super ( ) . __new__ ( cls , clsname , bases , attrs )
2025-02-13 15:07:05 +01:00
# When an Expression class is created, its key is automatically set to be
# the lowercase version of the class' name.
2025-02-13 06:15:54 +01:00
klass . key = clsname . lower ( )
2025-02-13 15:07:05 +01:00
# This is so that docstrings are not inherited in pdoc
klass . __doc__ = klass . __doc__ or " "
2025-02-13 06:15:54 +01:00
return klass
2025-02-13 21:04:58 +01:00
SQLGLOT_META = " sqlglot.meta "
2025-02-13 21:17:09 +01:00
TABLE_PARTS = ( " this " , " db " , " catalog " )
2025-02-13 21:31:23 +01:00
COLUMN_PARTS = ( " this " , " table " , " db " , " catalog " )
2025-02-13 21:04:58 +01:00
2025-02-13 06:15:54 +01:00
class Expression ( metaclass = _Expression ) :
"""
2025-02-13 15:07:05 +01:00
The base class for all expressions in a syntax tree . Each Expression encapsulates any necessary
context , such as its child expressions , their names ( arg keys ) , and whether a given child expression
is optional or not .
2025-02-13 06:15:54 +01:00
Attributes :
2025-02-13 15:07:05 +01:00
key : a unique key for each class in the Expression hierarchy . This is useful for hashing
and representing expressions as strings .
2025-02-13 21:28:36 +01:00
arg_types : determines the arguments ( child nodes ) supported by an expression . It maps
arg keys to booleans that indicate whether the corresponding args are optional .
2025-02-13 15:53:39 +01:00
parent : a reference to the parent expression ( or None , in case of root expressions ) .
arg_key : the arg key an expression is associated with , i . e . the name its parent expression
uses to refer to it .
2025-02-13 21:30:28 +01:00
index : the index of an expression if it is inside of a list argument in its parent .
2025-02-13 15:53:39 +01:00
comments : a list of comments that are associated with a given expression . This is used in
order to preserve comments when transpiling SQL code .
2025-02-13 20:48:36 +01:00
type : the ` sqlglot . expressions . DataType ` type of an expression . This is inferred by the
2025-02-13 15:53:39 +01:00
optimizer , in order to enable some transformations that require type information .
2025-02-13 20:48:36 +01:00
meta : a dictionary that can be used to store useful metadata for a given expression .
2025-02-13 15:07:05 +01:00
Example :
>> > class Foo ( Expression ) :
. . . arg_types = { " this " : True , " expression " : False }
The above definition informs us that Foo is an Expression that requires an argument called
" this " and may also optionally receive an argument called " expression " .
Args :
args : a mapping used for retrieving the arguments of an expression , given their arg keys .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 15:07:05 +01:00
key = " expression "
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True }
2025-02-13 21:30:28 +01:00
__slots__ = ( " args " , " parent " , " arg_key " , " index " , " comments " , " _type " , " _meta " , " _hash " )
2025-02-13 06:15:54 +01:00
2025-02-13 15:07:05 +01:00
def __init__ ( self , * * args : t . Any ) :
self . args : t . Dict [ str , t . Any ] = args
self . parent : t . Optional [ Expression ] = None
self . arg_key : t . Optional [ str ] = None
2025-02-13 21:30:28 +01:00
self . index : t . Optional [ int ] = None
2025-02-13 15:07:05 +01:00
self . comments : t . Optional [ t . List [ str ] ] = None
2025-02-13 14:58:37 +01:00
self . _type : t . Optional [ DataType ] = None
2025-02-13 15:40:23 +01:00
self . _meta : t . Optional [ t . Dict [ str , t . Any ] ] = None
2025-02-13 15:48:10 +01:00
self . _hash : t . Optional [ int ] = None
2025-02-13 06:15:54 +01:00
for arg_key , value in self . args . items ( ) :
self . _set_parent ( arg_key , value )
2025-02-13 14:53:05 +01:00
def __eq__ ( self , other ) - > bool :
2025-02-13 15:48:10 +01:00
return type ( self ) is type ( other ) and hash ( self ) == hash ( other )
2025-02-13 06:15:54 +01:00
2025-02-13 15:48:10 +01:00
@property
def hashable_args ( self ) - > t . Any :
2025-02-13 20:43:05 +01:00
return frozenset (
( k , tuple ( _norm_arg ( a ) for a in v ) if type ( v ) is list else _norm_arg ( v ) )
for k , v in self . args . items ( )
if not ( v is None or v is False or ( type ( v ) is list and not v ) )
2025-02-13 06:15:54 +01:00
)
2025-02-13 15:48:10 +01:00
def __hash__ ( self ) - > int :
if self . _hash is not None :
return self . _hash
return hash ( ( self . __class__ , self . hashable_args ) )
2025-02-13 06:15:54 +01:00
@property
2025-02-13 21:09:41 +01:00
def this ( self ) - > t . Any :
2025-02-13 15:07:05 +01:00
"""
Retrieves the argument with key " this " .
"""
2025-02-13 06:15:54 +01:00
return self . args . get ( " this " )
@property
2025-02-13 21:09:41 +01:00
def expression ( self ) - > t . Any :
2025-02-13 15:07:05 +01:00
"""
Retrieves the argument with key " expression " .
"""
2025-02-13 06:15:54 +01:00
return self . args . get ( " expression " )
@property
2025-02-13 21:17:09 +01:00
def expressions ( self ) - > t . List [ t . Any ] :
2025-02-13 15:07:05 +01:00
"""
Retrieves the argument with key " expressions " .
"""
2025-02-13 06:15:54 +01:00
return self . args . get ( " expressions " ) or [ ]
2025-02-13 15:26:26 +01:00
def text ( self , key ) - > str :
2025-02-13 15:07:05 +01:00
"""
Returns a textual representation of the argument corresponding to " key " . This can only be used
for args that are strings or leaf Expression instances , such as identifiers and literals .
"""
2025-02-13 06:15:54 +01:00
field = self . args . get ( key )
if isinstance ( field , str ) :
return field
if isinstance ( field , ( Identifier , Literal , Var ) ) :
return field . this
2025-02-13 15:23:26 +01:00
if isinstance ( field , ( Star , Null ) ) :
return field . name
2025-02-13 06:15:54 +01:00
return " "
@property
2025-02-13 15:26:26 +01:00
def is_string ( self ) - > bool :
2025-02-13 15:07:05 +01:00
"""
Checks whether a Literal expression is a string .
"""
2025-02-13 06:15:54 +01:00
return isinstance ( self , Literal ) and self . args [ " is_string " ]
@property
2025-02-13 15:26:26 +01:00
def is_number ( self ) - > bool :
2025-02-13 15:07:05 +01:00
"""
Checks whether a Literal expression is a number .
"""
2025-02-13 21:41:14 +01:00
return ( isinstance ( self , Literal ) and not self . args [ " is_string " ] ) or (
isinstance ( self , Neg ) and self . this . is_number
)
2025-02-13 06:15:54 +01:00
2025-02-13 21:41:14 +01:00
def to_py ( self ) - > t . Any :
2025-02-13 21:31:23 +01:00
"""
2025-02-13 21:41:14 +01:00
Returns a Python object equivalent of the SQL node .
2025-02-13 21:31:23 +01:00
"""
2025-02-13 21:41:14 +01:00
raise ValueError ( f " { self } cannot be converted to a Python object. " )
2025-02-13 21:31:23 +01:00
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:26:26 +01:00
def is_int ( self ) - > bool :
2025-02-13 15:07:05 +01:00
"""
2025-02-13 21:41:14 +01:00
Checks whether an expression is an integer .
2025-02-13 15:07:05 +01:00
"""
2025-02-13 21:41:14 +01:00
return self . is_number and isinstance ( self . to_py ( ) , int )
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:26:26 +01:00
def is_star ( self ) - > bool :
""" Checks whether an expression is a star. """
return isinstance ( self , Star ) or ( isinstance ( self , Column ) and isinstance ( self . this , Star ) )
@property
def alias ( self ) - > str :
2025-02-13 15:07:05 +01:00
"""
Returns the alias of the expression , or an empty string if it ' s not aliased.
"""
2025-02-13 06:15:54 +01:00
if isinstance ( self . args . get ( " alias " ) , TableAlias ) :
return self . args [ " alias " ] . name
return self . text ( " alias " )
2025-02-13 20:51:40 +01:00
@property
def alias_column_names ( self ) - > t . List [ str ] :
table_alias = self . args . get ( " alias " )
if not table_alias :
return [ ]
return [ c . name for c in table_alias . args . get ( " columns " ) or [ ] ]
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:23:26 +01:00
def name ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . text ( " this " )
@property
2025-02-13 15:57:23 +01:00
def alias_or_name ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . alias or self . name
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
"""
Name of the output column if this expression is a selection .
If the Expression has no output name , an empty string is returned .
Example :
>> > from sqlglot import parse_one
>> > parse_one ( " SELECT a " ) . expressions [ 0 ] . output_name
' a '
>> > parse_one ( " SELECT b AS c " ) . expressions [ 0 ] . output_name
' c '
>> > parse_one ( " SELECT 1 + 2 " ) . expressions [ 0 ] . output_name
' '
"""
return " "
2025-02-13 14:58:37 +01:00
@property
def type ( self ) - > t . Optional [ DataType ] :
return self . _type
@type.setter
def type ( self , dtype : t . Optional [ DataType | DataType . Type | str ] ) - > None :
if dtype and not isinstance ( dtype , DataType ) :
dtype = DataType . build ( dtype )
self . _type = dtype # type: ignore
2025-02-13 21:17:09 +01:00
def is_type ( self , * dtypes ) - > bool :
return self . type is not None and self . type . is_type ( * dtypes )
2025-02-13 21:19:14 +01:00
def is_leaf ( self ) - > bool :
return not any ( isinstance ( v , ( Expression , list ) ) for v in self . args . values ( ) )
2025-02-13 15:40:23 +01:00
@property
def meta ( self ) - > t . Dict [ str , t . Any ] :
if self . _meta is None :
self . _meta = { }
return self . _meta
2025-02-13 06:15:54 +01:00
def __deepcopy__ ( self , memo ) :
2025-02-13 21:30:28 +01:00
root = self . __class__ ( )
stack = [ ( self , root ) ]
while stack :
node , copy = stack . pop ( )
if node . comments is not None :
copy . comments = deepcopy ( node . comments )
if node . _type is not None :
copy . _type = deepcopy ( node . _type )
if node . _meta is not None :
copy . _meta = deepcopy ( node . _meta )
if node . _hash is not None :
copy . _hash = node . _hash
for k , vs in node . args . items ( ) :
if hasattr ( vs , " parent " ) :
stack . append ( ( vs , vs . __class__ ( ) ) )
copy . set ( k , stack [ - 1 ] [ - 1 ] )
elif type ( vs ) is list :
copy . args [ k ] = [ ]
for v in vs :
if hasattr ( v , " parent " ) :
stack . append ( ( v , v . __class__ ( ) ) )
copy . append ( k , stack [ - 1 ] [ - 1 ] )
else :
copy . append ( k , v )
else :
copy . args [ k ] = vs
return root
2025-02-13 06:15:54 +01:00
2025-02-13 21:57:37 +01:00
def copy ( self ) - > Self :
2025-02-13 15:07:05 +01:00
"""
Returns a deep copy of the expression .
"""
2025-02-13 21:30:28 +01:00
return deepcopy ( self )
2025-02-13 06:15:54 +01:00
2025-02-13 21:57:20 +01:00
def add_comments ( self , comments : t . Optional [ t . List [ str ] ] = None , prepend : bool = False ) - > None :
2025-02-13 15:53:39 +01:00
if self . comments is None :
self . comments = [ ]
2025-02-13 21:32:41 +01:00
2025-02-13 15:53:39 +01:00
if comments :
2025-02-13 21:04:58 +01:00
for comment in comments :
_ , * meta = comment . split ( SQLGLOT_META )
if meta :
for kv in " " . join ( meta ) . split ( " , " ) :
k , * v = kv . split ( " = " )
value = v [ 0 ] . strip ( ) if v else True
2025-02-13 21:58:22 +01:00
self . meta [ k . strip ( ) ] = to_bool ( value )
2025-02-13 21:57:20 +01:00
if not prepend :
self . comments . append ( comment )
if prepend :
self . comments = comments + self . comments
2025-02-13 15:53:39 +01:00
2025-02-13 21:32:41 +01:00
def pop_comments ( self ) - > t . List [ str ] :
comments = self . comments or [ ]
self . comments = None
return comments
2025-02-13 15:57:23 +01:00
def append ( self , arg_key : str , value : t . Any ) - > None :
2025-02-13 08:04:41 +01:00
"""
Appends value to arg_key if it ' s a list or sets it as a new list.
Args :
arg_key ( str ) : name of the list expression arg
value ( Any ) : value to append to the list
"""
2025-02-13 21:30:28 +01:00
if type ( self . args . get ( arg_key ) ) is not list :
2025-02-13 08:04:41 +01:00
self . args [ arg_key ] = [ ]
self . _set_parent ( arg_key , value )
2025-02-13 21:30:28 +01:00
values = self . args [ arg_key ]
if hasattr ( value , " parent " ) :
value . index = len ( values )
values . append ( value )
2025-02-13 08:04:41 +01:00
2025-02-13 21:54:13 +01:00
def set (
self ,
arg_key : str ,
value : t . Any ,
index : t . Optional [ int ] = None ,
overwrite : bool = True ,
) - > None :
2025-02-13 06:15:54 +01:00
"""
2025-02-13 20:44:18 +01:00
Sets arg_key to value .
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 20:44:18 +01:00
arg_key : name of the expression arg .
2025-02-13 06:15:54 +01:00
value : value to set the arg to .
2025-02-13 21:30:28 +01:00
index : if the arg is a list , this specifies what position to add the value in it .
2025-02-13 21:54:13 +01:00
overwrite : assuming an index is given , this determines whether to overwrite the
list entry instead of only inserting a new value ( i . e . , like list . insert ) .
2025-02-13 21:30:28 +01:00
"""
if index is not None :
expressions = self . args . get ( arg_key ) or [ ]
if seq_get ( expressions , index ) is None :
return
if value is None :
expressions . pop ( index )
for v in expressions [ index : ] :
v . index = v . index - 1
return
if isinstance ( value , list ) :
expressions . pop ( index )
expressions [ index : index ] = value
2025-02-13 21:54:13 +01:00
elif overwrite :
2025-02-13 21:30:28 +01:00
expressions [ index ] = value
2025-02-13 21:54:13 +01:00
else :
expressions . insert ( index , value )
2025-02-13 21:30:28 +01:00
value = expressions
elif value is None :
2025-02-13 20:44:18 +01:00
self . args . pop ( arg_key , None )
return
2025-02-13 06:15:54 +01:00
self . args [ arg_key ] = value
2025-02-13 21:30:28 +01:00
self . _set_parent ( arg_key , value , index )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
def _set_parent ( self , arg_key : str , value : t . Any , index : t . Optional [ int ] = None ) - > None :
2025-02-13 15:48:10 +01:00
if hasattr ( value , " parent " ) :
2025-02-13 06:15:54 +01:00
value . parent = self
value . arg_key = arg_key
2025-02-13 21:30:28 +01:00
value . index = index
2025-02-13 15:48:10 +01:00
elif type ( value ) is list :
2025-02-13 21:30:28 +01:00
for index , v in enumerate ( value ) :
2025-02-13 15:48:10 +01:00
if hasattr ( v , " parent " ) :
2025-02-13 06:15:54 +01:00
v . parent = self
v . arg_key = arg_key
2025-02-13 21:30:28 +01:00
v . index = index
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:57:23 +01:00
def depth ( self ) - > int :
2025-02-13 06:15:54 +01:00
"""
Returns the depth of this tree .
"""
if self . parent :
return self . parent . depth + 1
return 0
2025-02-13 21:30:28 +01:00
def iter_expressions ( self , reverse : bool = False ) - > t . Iterator [ Expression ] :
2025-02-13 15:48:10 +01:00
""" Yields the key and expression for all arguments, exploding list args. """
2025-02-13 21:30:28 +01:00
# remove tuple when python 3.7 is deprecated
2025-02-13 21:56:02 +01:00
for vs in reversed ( tuple ( self . args . values ( ) ) ) if reverse else self . args . values ( ) : # type: ignore
2025-02-13 15:48:10 +01:00
if type ( vs ) is list :
2025-02-13 21:56:02 +01:00
for v in reversed ( vs ) if reverse else vs : # type: ignore
2025-02-13 15:48:10 +01:00
if hasattr ( v , " parent " ) :
2025-02-13 21:30:28 +01:00
yield v
2025-02-13 15:48:10 +01:00
else :
if hasattr ( vs , " parent " ) :
2025-02-13 21:30:28 +01:00
yield vs
2025-02-13 15:48:10 +01:00
2025-02-13 15:57:23 +01:00
def find ( self , * expression_types : t . Type [ E ] , bfs : bool = True ) - > t . Optional [ E ] :
2025-02-13 06:15:54 +01:00
"""
Returns the first node in this tree which matches at least one of
the specified types .
Args :
2025-02-13 15:46:19 +01:00
expression_types : the expression type ( s ) to match .
2025-02-13 15:57:23 +01:00
bfs : whether to search the AST using the BFS algorithm ( DFS is used if false ) .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:07:05 +01:00
The node which matches the criteria or None if no such node was found .
2025-02-13 06:15:54 +01:00
"""
return next ( self . find_all ( * expression_types , bfs = bfs ) , None )
2025-02-13 15:57:23 +01:00
def find_all ( self , * expression_types : t . Type [ E ] , bfs : bool = True ) - > t . Iterator [ E ] :
2025-02-13 06:15:54 +01:00
"""
Returns a generator object which visits all nodes in this tree and only
yields those that match at least one of the specified expression types .
Args :
2025-02-13 15:46:19 +01:00
expression_types : the expression type ( s ) to match .
2025-02-13 15:57:23 +01:00
bfs : whether to search the AST using the BFS algorithm ( DFS is used if false ) .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:07:05 +01:00
The generator object .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
for expression in self . walk ( bfs = bfs ) :
2025-02-13 06:15:54 +01:00
if isinstance ( expression , expression_types ) :
yield expression
2025-02-13 15:57:23 +01:00
def find_ancestor ( self , * expression_types : t . Type [ E ] ) - > t . Optional [ E ] :
2025-02-13 06:15:54 +01:00
"""
Returns a nearest parent matching expression_types .
Args :
2025-02-13 15:46:19 +01:00
expression_types : the expression type ( s ) to match .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:07:05 +01:00
The parent node .
2025-02-13 06:15:54 +01:00
"""
ancestor = self . parent
while ancestor and not isinstance ( ancestor , expression_types ) :
ancestor = ancestor . parent
2025-02-13 21:19:58 +01:00
return ancestor # type: ignore
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:57:23 +01:00
def parent_select ( self ) - > t . Optional [ Select ] :
2025-02-13 06:15:54 +01:00
"""
Returns the parent select statement .
"""
return self . find_ancestor ( Select )
2025-02-13 15:48:10 +01:00
@property
2025-02-13 15:57:23 +01:00
def same_parent ( self ) - > bool :
2025-02-13 15:48:10 +01:00
""" Returns if the parent is the same class as itself. """
return type ( self . parent ) is self . __class__
2025-02-13 15:40:23 +01:00
def root ( self ) - > Expression :
"""
Returns the root expression of this tree .
"""
expression = self
while expression . parent :
expression = expression . parent
return expression
2025-02-13 21:30:28 +01:00
def walk (
self , bfs : bool = True , prune : t . Optional [ t . Callable [ [ Expression ] , bool ] ] = None
) - > t . Iterator [ Expression ] :
2025-02-13 06:15:54 +01:00
"""
Returns a generator object which visits all nodes in this tree .
Args :
2025-02-13 21:30:28 +01:00
bfs : if set to True the BFS traversal order will be applied ,
2025-02-13 06:15:54 +01:00
otherwise the DFS traversal will be used instead .
2025-02-13 21:30:28 +01:00
prune : callable that returns True if the generator should stop traversing
this branch of the tree .
2025-02-13 06:15:54 +01:00
Returns :
the generator object .
"""
if bfs :
2025-02-13 14:43:32 +01:00
yield from self . bfs ( prune = prune )
2025-02-13 06:15:54 +01:00
else :
2025-02-13 14:43:32 +01:00
yield from self . dfs ( prune = prune )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
def dfs (
self , prune : t . Optional [ t . Callable [ [ Expression ] , bool ] ] = None
) - > t . Iterator [ Expression ] :
2025-02-13 06:15:54 +01:00
"""
Returns a generator object which visits all nodes in this tree in
the DFS ( Depth - first ) order .
Returns :
2025-02-13 15:07:05 +01:00
The generator object .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
stack = [ self ]
while stack :
node = stack . pop ( )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
yield node
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
if prune and prune ( node ) :
continue
for v in node . iter_expressions ( reverse = True ) :
stack . append ( v )
def bfs (
self , prune : t . Optional [ t . Callable [ [ Expression ] , bool ] ] = None
) - > t . Iterator [ Expression ] :
2025-02-13 06:15:54 +01:00
"""
Returns a generator object which visits all nodes in this tree in
the BFS ( Breadth - first ) order .
Returns :
2025-02-13 15:07:05 +01:00
The generator object .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
queue = deque ( [ self ] )
2025-02-13 06:15:54 +01:00
while queue :
2025-02-13 21:30:28 +01:00
node = queue . popleft ( )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
yield node
if prune and prune ( node ) :
2025-02-13 06:15:54 +01:00
continue
2025-02-13 21:30:28 +01:00
for v in node . iter_expressions ( ) :
queue . append ( v )
2025-02-13 06:15:54 +01:00
def unnest ( self ) :
"""
Returns the first non parenthesis child or self .
"""
expression = self
2025-02-13 15:48:10 +01:00
while type ( expression ) is Paren :
2025-02-13 06:15:54 +01:00
expression = expression . this
return expression
2025-02-13 08:04:41 +01:00
def unalias ( self ) :
"""
Returns the inner expression if this is an Alias .
"""
if isinstance ( self , Alias ) :
return self . this
return self
2025-02-13 06:15:54 +01:00
def unnest_operands ( self ) :
"""
Returns unnested operands as a tuple .
"""
2025-02-13 21:30:28 +01:00
return tuple ( arg . unnest ( ) for arg in self . iter_expressions ( ) )
2025-02-13 06:15:54 +01:00
def flatten ( self , unnest = True ) :
"""
2025-02-13 21:17:09 +01:00
Returns a generator which yields child nodes whose parents are the same class .
2025-02-13 06:15:54 +01:00
A AND B AND C - > [ A , B , C ]
"""
2025-02-13 21:30:28 +01:00
for node in self . dfs ( prune = lambda n : n . parent and type ( n ) is not self . __class__ ) :
2025-02-13 21:20:36 +01:00
if type ( node ) is not self . __class__ :
2025-02-13 21:08:10 +01:00
yield node . unnest ( ) if unnest and not isinstance ( node , Subquery ) else node
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def __str__ ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . sql ( )
2025-02-13 15:57:23 +01:00
def __repr__ ( self ) - > str :
2025-02-13 21:19:14 +01:00
return _to_s ( self )
def to_s ( self ) - > str :
"""
Same as __repr__ , but includes additional information which can be useful
for debugging , like empty or missing args and the AST nodes ' object IDs.
"""
return _to_s ( self , verbose = True )
2025-02-13 06:15:54 +01:00
2025-02-13 15:09:58 +01:00
def sql ( self , dialect : DialectType = None , * * opts ) - > str :
2025-02-13 06:15:54 +01:00
"""
Returns SQL string representation of this tree .
2025-02-13 15:07:05 +01:00
Args :
dialect : the dialect of the output SQL string ( eg . " spark " , " hive " , " presto " , " mysql " ) .
opts : other ` sqlglot . generator . Generator ` options .
2025-02-13 06:15:54 +01:00
2025-02-13 15:07:05 +01:00
Returns :
The SQL string .
2025-02-13 06:15:54 +01:00
"""
from sqlglot . dialects import Dialect
2025-02-13 21:17:09 +01:00
return Dialect . get_or_raise ( dialect ) . generate ( self , * * opts )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
def transform ( self , fun : t . Callable , * args : t . Any , copy : bool = True , * * kwargs ) - > Expression :
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
Visits all tree nodes ( excluding already transformed ones )
2025-02-13 06:15:54 +01:00
and applies the given transformation function to each node .
Args :
2025-02-13 21:30:28 +01:00
fun : a function which takes a node as an argument and returns a
2025-02-13 14:31:47 +01:00
new transformed node or the same node without modifications . If the function
returns None , then the corresponding node will be removed from the syntax tree .
2025-02-13 21:30:28 +01:00
copy : if set to True a new tree instance is constructed , otherwise the tree is
2025-02-13 06:15:54 +01:00
modified in place .
Returns :
2025-02-13 15:07:05 +01:00
The transformed tree .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
root = None
new_node = None
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
for node in ( self . copy ( ) if copy else self ) . dfs ( prune = lambda n : n is not new_node ) :
parent , arg_key , index = node . parent , node . arg_key , node . index
new_node = fun ( node , * args , * * kwargs )
if not root :
root = new_node
elif new_node is not node :
parent . set ( arg_key , new_node , index )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
assert root
return root . assert_is ( Expression )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
@t.overload
2025-02-13 21:29:39 +01:00
def replace ( self , expression : E ) - > E : . . .
2025-02-13 15:57:23 +01:00
@t.overload
2025-02-13 21:29:39 +01:00
def replace ( self , expression : None ) - > None : . . .
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
def replace ( self , expression ) :
"""
Swap out this expression with a new expression .
For example : :
>> > tree = Select ( ) . select ( " x " ) . from_ ( " tbl " )
2025-02-13 21:19:14 +01:00
>> > tree . find ( Column ) . replace ( column ( " y " ) )
Column (
this = Identifier ( this = y , quoted = False ) )
2025-02-13 06:15:54 +01:00
>> > tree . sql ( )
' SELECT y FROM tbl '
Args :
2025-02-13 15:57:23 +01:00
expression : new node
2025-02-13 06:15:54 +01:00
2025-02-13 15:07:05 +01:00
Returns :
The new expression or expressions .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
parent = self . parent
if not parent or parent is expression :
2025-02-13 06:15:54 +01:00
return expression
2025-02-13 21:30:28 +01:00
key = self . arg_key
value = parent . args . get ( key )
if type ( expression ) is list and isinstance ( value , Expression ) :
# We are trying to replace an Expression with a list, so it's assumed that
# the intention was to really replace the parent of this expression.
value . parent . replace ( expression )
else :
parent . set ( key , expression , self . index )
if expression is not self :
self . parent = None
self . arg_key = None
self . index = None
2025-02-13 06:15:54 +01:00
return expression
2025-02-13 15:57:23 +01:00
def pop ( self : E ) - > E :
2025-02-13 14:40:43 +01:00
"""
Remove this expression from its AST .
2025-02-13 15:46:19 +01:00
Returns :
The popped expression .
2025-02-13 14:40:43 +01:00
"""
self . replace ( None )
2025-02-13 15:46:19 +01:00
return self
2025-02-13 14:40:43 +01:00
2025-02-13 15:57:23 +01:00
def assert_is ( self , type_ : t . Type [ E ] ) - > E :
2025-02-13 06:15:54 +01:00
"""
Assert that this ` Expression ` is an instance of ` type_ ` .
If it is NOT an instance of ` type_ ` , this raises an assertion error .
Otherwise , this returns this expression .
Examples :
This is useful for type security in chained expressions :
>> > import sqlglot
>> > sqlglot . parse_one ( " SELECT x from y " ) . assert_is ( Select ) . select ( " z " ) . sql ( )
' SELECT x, z FROM y '
"""
2025-02-13 21:20:36 +01:00
if not isinstance ( self , type_ ) :
raise AssertionError ( f " { self } is not { type_ } . " )
2025-02-13 06:15:54 +01:00
return self
2025-02-13 15:07:05 +01:00
def error_messages ( self , args : t . Optional [ t . Sequence ] = None ) - > t . List [ str ] :
"""
Checks if this expression is valid ( e . g . all mandatory args are set ) .
Args :
args : a sequence of values that were used to instantiate a Func expression . This is used
to check that the provided arguments don ' t exceed the function argument limit.
Returns :
A list of error messages for all possible errors that were found .
"""
errors : t . List [ str ] = [ ]
for k in self . args :
if k not in self . arg_types :
errors . append ( f " Unexpected keyword: ' { k } ' for { self . __class__ } " )
for k , mandatory in self . arg_types . items ( ) :
v = self . args . get ( k )
if mandatory and ( v is None or ( isinstance ( v , list ) and not v ) ) :
errors . append ( f " Required keyword: ' { k } ' missing for { self . __class__ } " )
if (
args
and isinstance ( self , Func )
and len ( args ) > len ( self . arg_types )
and not self . is_var_len_args
) :
errors . append (
f " The number of provided arguments ( { len ( args ) } ) is greater than "
f " the maximum number of supported arguments ( { len ( self . arg_types ) } ) "
)
return errors
2025-02-13 15:03:38 +01:00
def dump ( self ) :
"""
Dump this Expression to a JSON - serializable dict .
"""
from sqlglot . serde import dump
return dump ( self )
@classmethod
def load ( cls , obj ) :
"""
Load a dict ( as returned by ` Expression . dump ` ) into an Expression instance .
"""
from sqlglot . serde import load
return load ( obj )
2025-02-13 15:57:23 +01:00
def and_ (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
dialect : DialectType = None ,
copy : bool = True ,
2025-02-13 21:56:38 +01:00
wrap : bool = True ,
2025-02-13 15:57:23 +01:00
* * opts ,
) - > Condition :
2025-02-13 06:15:54 +01:00
"""
AND this condition with one or multiple expressions .
Example :
>> > condition ( " x=1 " ) . and_ ( " y=1 " ) . sql ( )
' x = 1 AND y = 1 '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy the involved expressions ( only applies to Expressions ) .
2025-02-13 21:56:38 +01:00
wrap : whether to wrap the operands in ` Paren ` s . This is true by default to avoid
precedence issues , but can be turned off when the produced AST is too deep and
causes recursion - related issues .
2025-02-13 15:57:23 +01:00
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new And condition .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:56:38 +01:00
return and_ ( self , * expressions , dialect = dialect , copy = copy , wrap = wrap , * * opts )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def or_ (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
dialect : DialectType = None ,
copy : bool = True ,
2025-02-13 21:56:38 +01:00
wrap : bool = True ,
2025-02-13 15:57:23 +01:00
* * opts ,
) - > Condition :
2025-02-13 06:15:54 +01:00
"""
OR this condition with one or multiple expressions .
Example :
>> > condition ( " x=1 " ) . or_ ( " y=1 " ) . sql ( )
' x = 1 OR y = 1 '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy the involved expressions ( only applies to Expressions ) .
2025-02-13 21:56:38 +01:00
wrap : whether to wrap the operands in ` Paren ` s . This is true by default to avoid
precedence issues , but can be turned off when the produced AST is too deep and
causes recursion - related issues .
2025-02-13 15:57:23 +01:00
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Or condition .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:56:38 +01:00
return or_ ( self , * expressions , dialect = dialect , copy = copy , wrap = wrap , * * opts )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def not_ ( self , copy : bool = True ) :
2025-02-13 06:15:54 +01:00
"""
Wrap this condition with NOT .
Example :
>> > condition ( " x=1 " ) . not_ ( ) . sql ( )
' NOT x = 1 '
2025-02-13 15:53:39 +01:00
Args :
2025-02-13 21:28:36 +01:00
copy : whether to copy this object .
2025-02-13 15:53:39 +01:00
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Not instance .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 15:53:39 +01:00
return not_ ( self , copy = copy )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def as_ (
self ,
alias : str | Identifier ,
quoted : t . Optional [ bool ] = None ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Alias :
return alias_ ( self , alias , quoted = quoted , dialect = dialect , copy = copy , * * opts )
def _binop ( self , klass : t . Type [ E ] , other : t . Any , reverse : bool = False ) - > E :
2025-02-13 15:53:39 +01:00
this = self . copy ( )
other = convert ( other , copy = True )
2025-02-13 15:52:09 +01:00
if not isinstance ( this , klass ) and not isinstance ( other , klass ) :
this = _wrap ( this , Binary )
other = _wrap ( other , Binary )
if reverse :
return klass ( this = other , expression = this )
return klass ( this = this , expression = other )
2025-02-13 21:03:38 +01:00
def __getitem__ ( self , other : ExpOrStr | t . Tuple [ ExpOrStr ] ) - > Bracket :
2025-02-13 15:53:39 +01:00
return Bracket (
this = self . copy ( ) , expressions = [ convert ( e , copy = True ) for e in ensure_list ( other ) ]
)
2025-02-13 15:52:09 +01:00
2025-02-13 21:03:38 +01:00
def __iter__ ( self ) - > t . Iterator :
if " expressions " in self . arg_types :
return iter ( self . args . get ( " expressions " ) or [ ] )
# We define this because __getitem__ converts Expression into an iterable, which is
# problematic because one can hit infinite loops if they do "for x in some_expr: ..."
# See: https://peps.python.org/pep-0234/
raise TypeError ( f " ' { self . __class__ . __name__ } ' object is not iterable " )
2025-02-13 15:53:39 +01:00
def isin (
2025-02-13 20:46:55 +01:00
self ,
* expressions : t . Any ,
query : t . Optional [ ExpOrStr ] = None ,
unnest : t . Optional [ ExpOrStr ] | t . Collection [ ExpOrStr ] = None ,
copy : bool = True ,
* * opts ,
2025-02-13 15:53:39 +01:00
) - > In :
2025-02-13 21:31:23 +01:00
subquery = maybe_parse ( query , copy = copy , * * opts ) if query else None
if subquery and not isinstance ( subquery , Subquery ) :
subquery = subquery . subquery ( copy = False )
2025-02-13 15:52:09 +01:00
return In (
2025-02-13 20:48:36 +01:00
this = maybe_copy ( self , copy ) ,
2025-02-13 15:53:39 +01:00
expressions = [ convert ( e , copy = copy ) for e in expressions ] ,
2025-02-13 21:31:23 +01:00
query = subquery ,
2025-02-13 21:19:58 +01:00
unnest = (
Unnest (
expressions = [
maybe_parse ( t . cast ( ExpOrStr , e ) , copy = copy , * * opts )
for e in ensure_list ( unnest )
]
)
if unnest
else None
) ,
2025-02-13 15:53:39 +01:00
)
2025-02-13 15:57:23 +01:00
def between ( self , low : t . Any , high : t . Any , copy : bool = True , * * opts ) - > Between :
2025-02-13 15:53:39 +01:00
return Between (
2025-02-13 20:48:36 +01:00
this = maybe_copy ( self , copy ) ,
2025-02-13 15:53:39 +01:00
low = convert ( low , copy = copy , * * opts ) ,
high = convert ( high , copy = copy , * * opts ) ,
2025-02-13 15:52:09 +01:00
)
2025-02-13 15:57:23 +01:00
def is_ ( self , other : ExpOrStr ) - > Is :
return self . _binop ( Is , other )
2025-02-13 15:52:09 +01:00
def like ( self , other : ExpOrStr ) - > Like :
return self . _binop ( Like , other )
def ilike ( self , other : ExpOrStr ) - > ILike :
return self . _binop ( ILike , other )
2025-02-13 15:57:23 +01:00
def eq ( self , other : t . Any ) - > EQ :
2025-02-13 15:52:09 +01:00
return self . _binop ( EQ , other )
2025-02-13 15:57:23 +01:00
def neq ( self , other : t . Any ) - > NEQ :
2025-02-13 15:52:09 +01:00
return self . _binop ( NEQ , other )
def rlike ( self , other : ExpOrStr ) - > RegexpLike :
return self . _binop ( RegexpLike , other )
2025-02-13 21:17:09 +01:00
def div ( self , other : ExpOrStr , typed : bool = False , safe : bool = False ) - > Div :
div = self . _binop ( Div , other )
div . args [ " typed " ] = typed
div . args [ " safe " ] = safe
return div
2025-02-13 21:30:28 +01:00
def asc ( self , nulls_first : bool = True ) - > Ordered :
return Ordered ( this = self . copy ( ) , nulls_first = nulls_first )
2025-02-13 21:19:14 +01:00
def desc ( self , nulls_first : bool = False ) - > Ordered :
return Ordered ( this = self . copy ( ) , desc = True , nulls_first = nulls_first )
2025-02-13 15:57:23 +01:00
def __lt__ ( self , other : t . Any ) - > LT :
2025-02-13 15:52:09 +01:00
return self . _binop ( LT , other )
2025-02-13 15:57:23 +01:00
def __le__ ( self , other : t . Any ) - > LTE :
2025-02-13 15:52:09 +01:00
return self . _binop ( LTE , other )
2025-02-13 15:57:23 +01:00
def __gt__ ( self , other : t . Any ) - > GT :
2025-02-13 15:52:09 +01:00
return self . _binop ( GT , other )
2025-02-13 15:57:23 +01:00
def __ge__ ( self , other : t . Any ) - > GTE :
2025-02-13 15:52:09 +01:00
return self . _binop ( GTE , other )
2025-02-13 15:57:23 +01:00
def __add__ ( self , other : t . Any ) - > Add :
2025-02-13 15:52:09 +01:00
return self . _binop ( Add , other )
2025-02-13 15:57:23 +01:00
def __radd__ ( self , other : t . Any ) - > Add :
2025-02-13 15:52:09 +01:00
return self . _binop ( Add , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __sub__ ( self , other : t . Any ) - > Sub :
2025-02-13 15:52:09 +01:00
return self . _binop ( Sub , other )
2025-02-13 15:57:23 +01:00
def __rsub__ ( self , other : t . Any ) - > Sub :
2025-02-13 15:52:09 +01:00
return self . _binop ( Sub , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __mul__ ( self , other : t . Any ) - > Mul :
2025-02-13 15:52:09 +01:00
return self . _binop ( Mul , other )
2025-02-13 15:57:23 +01:00
def __rmul__ ( self , other : t . Any ) - > Mul :
2025-02-13 15:52:09 +01:00
return self . _binop ( Mul , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __truediv__ ( self , other : t . Any ) - > Div :
2025-02-13 15:52:09 +01:00
return self . _binop ( Div , other )
2025-02-13 15:57:23 +01:00
def __rtruediv__ ( self , other : t . Any ) - > Div :
2025-02-13 15:52:09 +01:00
return self . _binop ( Div , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __floordiv__ ( self , other : t . Any ) - > IntDiv :
2025-02-13 15:52:09 +01:00
return self . _binop ( IntDiv , other )
2025-02-13 15:57:23 +01:00
def __rfloordiv__ ( self , other : t . Any ) - > IntDiv :
2025-02-13 15:52:09 +01:00
return self . _binop ( IntDiv , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __mod__ ( self , other : t . Any ) - > Mod :
2025-02-13 15:52:09 +01:00
return self . _binop ( Mod , other )
2025-02-13 15:57:23 +01:00
def __rmod__ ( self , other : t . Any ) - > Mod :
2025-02-13 15:52:09 +01:00
return self . _binop ( Mod , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __pow__ ( self , other : t . Any ) - > Pow :
2025-02-13 15:52:09 +01:00
return self . _binop ( Pow , other )
2025-02-13 15:57:23 +01:00
def __rpow__ ( self , other : t . Any ) - > Pow :
2025-02-13 15:52:09 +01:00
return self . _binop ( Pow , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __and__ ( self , other : t . Any ) - > And :
2025-02-13 15:52:09 +01:00
return self . _binop ( And , other )
2025-02-13 15:57:23 +01:00
def __rand__ ( self , other : t . Any ) - > And :
2025-02-13 15:52:09 +01:00
return self . _binop ( And , other , reverse = True )
2025-02-13 15:57:23 +01:00
def __or__ ( self , other : t . Any ) - > Or :
2025-02-13 15:52:09 +01:00
return self . _binop ( Or , other )
2025-02-13 15:57:23 +01:00
def __ror__ ( self , other : t . Any ) - > Or :
2025-02-13 15:52:09 +01:00
return self . _binop ( Or , other , reverse = True )
def __neg__ ( self ) - > Neg :
2025-02-13 15:53:39 +01:00
return Neg ( this = _wrap ( self . copy ( ) , Binary ) )
2025-02-13 15:52:09 +01:00
def __invert__ ( self ) - > Not :
2025-02-13 15:53:39 +01:00
return not_ ( self . copy ( ) )
2025-02-13 15:52:09 +01:00
2025-02-13 06:15:54 +01:00
2025-02-13 21:03:38 +01:00
IntoType = t . Union [
str ,
t . Type [ Expression ] ,
t . Collection [ t . Union [ str , t . Type [ Expression ] ] ] ,
]
ExpOrStr = t . Union [ str , Expression ]
class Condition ( Expression ) :
""" Logical conditions like x AND y, or simply x """
2025-02-13 06:15:54 +01:00
class Predicate ( Condition ) :
""" Relationships like x = y, x > 1, x >= y. """
class DerivedTable ( Expression ) :
@property
2025-02-13 20:45:52 +01:00
def selects ( self ) - > t . List [ Expression ] :
2025-02-13 21:29:39 +01:00
return self . this . selects if isinstance ( self . this , Query ) else [ ]
2025-02-13 06:15:54 +01:00
@property
2025-02-13 20:45:52 +01:00
def named_selects ( self ) - > t . List [ str ] :
2025-02-13 15:07:05 +01:00
return [ select . output_name for select in self . selects ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:29:39 +01:00
class Query ( Expression ) :
def subquery ( self , alias : t . Optional [ ExpOrStr ] = None , copy : bool = True ) - > Subquery :
"""
Returns a ` Subquery ` that wraps around this query .
Example :
>> > subquery = Select ( ) . select ( " x " ) . from_ ( " tbl " ) . subquery ( )
>> > Select ( ) . select ( " x " ) . from_ ( subquery ) . sql ( )
' SELECT x FROM (SELECT x FROM tbl) '
Args :
alias : an optional alias for the subquery .
copy : if ` False ` , modify this expression instance in - place .
"""
instance = maybe_copy ( self , copy )
if not isinstance ( alias , Expression ) :
alias = TableAlias ( this = to_identifier ( alias ) ) if alias else None
return Subquery ( this = instance , alias = alias )
def limit (
2025-02-13 21:31:23 +01:00
self : Q , expression : ExpOrStr | int , dialect : DialectType = None , copy : bool = True , * * opts
) - > Q :
2025-02-13 21:29:39 +01:00
"""
Adds a LIMIT clause to this query .
Example :
>> > select ( " 1 " ) . union ( select ( " 1 " ) ) . limit ( 1 ) . sql ( )
2025-02-13 21:31:23 +01:00
' SELECT 1 UNION SELECT 1 LIMIT 1 '
2025-02-13 21:29:39 +01:00
Args :
expression : the SQL code string to parse .
This can also be an integer .
If a ` Limit ` instance is passed , it will be used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Limit ` .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
A limited Select expression .
"""
2025-02-13 21:31:23 +01:00
return _apply_builder (
expression = expression ,
instance = self ,
arg = " limit " ,
into = Limit ,
prefix = " LIMIT " ,
dialect = dialect ,
copy = copy ,
into_arg = " expression " ,
* * opts ,
)
def offset (
self : Q , expression : ExpOrStr | int , dialect : DialectType = None , copy : bool = True , * * opts
) - > Q :
"""
Set the OFFSET expression .
Example :
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . offset ( 10 ) . sql ( )
' SELECT x FROM tbl OFFSET 10 '
Args :
expression : the SQL code string to parse .
This can also be an integer .
If a ` Offset ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Offset ` .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified Select expression .
"""
return _apply_builder (
expression = expression ,
instance = self ,
arg = " offset " ,
into = Offset ,
prefix = " OFFSET " ,
dialect = dialect ,
copy = copy ,
into_arg = " expression " ,
* * opts ,
)
def order_by (
self : Q ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Q :
"""
Set the ORDER BY expression .
Example :
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . order_by ( " x DESC " ) . sql ( )
' SELECT x FROM tbl ORDER BY x DESC '
Args :
* expressions : the SQL code strings to parse .
If a ` Group ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Order ` .
append : if ` True ` , add to any existing expressions .
Otherwise , this flattens all the ` Order ` expression into a single expression .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified Select expression .
"""
return _apply_child_list_builder (
* expressions ,
instance = self ,
arg = " order " ,
append = append ,
copy = copy ,
prefix = " ORDER BY " ,
into = Order ,
dialect = dialect ,
* * opts ,
2025-02-13 21:29:39 +01:00
)
@property
def ctes ( self ) - > t . List [ CTE ] :
""" Returns a list of all the CTEs attached to this query. """
with_ = self . args . get ( " with " )
return with_ . expressions if with_ else [ ]
@property
def selects ( self ) - > t . List [ Expression ] :
""" Returns the query ' s projections. """
raise NotImplementedError ( " Query objects must implement `selects` " )
@property
def named_selects ( self ) - > t . List [ str ] :
""" Returns the output names of the query ' s projections. """
raise NotImplementedError ( " Query objects must implement `named_selects` " )
def select (
2025-02-13 21:30:28 +01:00
self : Q ,
2025-02-13 21:29:39 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 21:30:28 +01:00
) - > Q :
2025-02-13 21:29:39 +01:00
"""
Append to or set the SELECT expressions .
Example :
>> > Select ( ) . select ( " x " , " y " ) . sql ( )
' SELECT x, y '
Args :
* expressions : the SQL code strings to parse .
If an ` Expression ` instance is passed , it will be used as - is .
append : if ` True ` , add to any existing expressions .
Otherwise , this resets the expressions .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified Query expression .
"""
raise NotImplementedError ( " Query objects must implement `select` " )
def with_ (
2025-02-13 21:30:28 +01:00
self : Q ,
2025-02-13 21:29:39 +01:00
alias : ExpOrStr ,
as_ : ExpOrStr ,
recursive : t . Optional [ bool ] = None ,
2025-02-13 21:54:13 +01:00
materialized : t . Optional [ bool ] = None ,
2025-02-13 21:29:39 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 21:30:28 +01:00
) - > Q :
2025-02-13 21:29:39 +01:00
"""
Append to or set the common table expressions .
Example :
>> > Select ( ) . with_ ( " tbl2 " , as_ = " SELECT * FROM tbl " ) . select ( " x " ) . from_ ( " tbl2 " ) . sql ( )
' WITH tbl2 AS (SELECT * FROM tbl) SELECT x FROM tbl2 '
Args :
alias : the SQL code string to parse as the table name .
If an ` Expression ` instance is passed , this is used as - is .
as_ : the SQL code string to parse as the table expression .
If an ` Expression ` instance is passed , it will be used as - is .
recursive : set the RECURSIVE part of the expression . Defaults to ` False ` .
2025-02-13 21:54:13 +01:00
materialized : set the MATERIALIZED part of the expression .
2025-02-13 21:29:39 +01:00
append : if ` True ` , add to any existing expressions .
Otherwise , this resets the expressions .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified expression .
"""
return _apply_cte_builder (
2025-02-13 21:54:13 +01:00
self ,
alias ,
as_ ,
recursive = recursive ,
materialized = materialized ,
append = append ,
dialect = dialect ,
copy = copy ,
* * opts ,
2025-02-13 21:29:39 +01:00
)
2025-02-13 15:57:23 +01:00
def union (
2025-02-13 21:56:02 +01:00
self , * expressions : ExpOrStr , distinct : bool = True , dialect : DialectType = None , * * opts
2025-02-13 21:19:58 +01:00
) - > Union :
2025-02-13 14:45:11 +01:00
"""
Builds a UNION expression .
Example :
>> > import sqlglot
>> > sqlglot . parse_one ( " SELECT * FROM foo " ) . union ( " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo UNION SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings .
If ` Expression ` instances are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Union expression .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
return union ( self , * expressions , distinct = distinct , dialect = dialect , * * opts )
2025-02-13 14:45:11 +01:00
2025-02-13 15:57:23 +01:00
def intersect (
2025-02-13 21:56:02 +01:00
self , * expressions : ExpOrStr , distinct : bool = True , dialect : DialectType = None , * * opts
2025-02-13 21:29:39 +01:00
) - > Intersect :
2025-02-13 14:45:11 +01:00
"""
Builds an INTERSECT expression .
Example :
>> > import sqlglot
>> > sqlglot . parse_one ( " SELECT * FROM foo " ) . intersect ( " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo INTERSECT SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings .
If ` Expression ` instances are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Intersect expression .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
return intersect ( self , * expressions , distinct = distinct , dialect = dialect , * * opts )
2025-02-13 14:45:11 +01:00
2025-02-13 15:57:23 +01:00
def except_ (
2025-02-13 21:56:02 +01:00
self , * expressions : ExpOrStr , distinct : bool = True , dialect : DialectType = None , * * opts
2025-02-13 21:29:39 +01:00
) - > Except :
2025-02-13 14:45:11 +01:00
"""
Builds an EXCEPT expression .
Example :
>> > import sqlglot
>> > sqlglot . parse_one ( " SELECT * FROM foo " ) . except_ ( " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo EXCEPT SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings .
If ` Expression ` instance are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Except expression .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
return except_ ( self , * expressions , distinct = distinct , dialect = dialect , * * opts )
2025-02-13 14:45:11 +01:00
2025-02-13 21:29:39 +01:00
class UDTF ( DerivedTable ) :
2025-02-13 15:53:39 +01:00
@property
2025-02-13 20:45:52 +01:00
def selects ( self ) - > t . List [ Expression ] :
2025-02-13 15:53:39 +01:00
alias = self . args . get ( " alias " )
return alias . columns if alias else [ ]
2025-02-13 14:43:32 +01:00
2025-02-13 06:15:54 +01:00
class Cache ( Expression ) :
arg_types = {
" this " : True ,
" lazy " : False ,
" options " : False ,
" expression " : False ,
}
class Uncache ( Expression ) :
arg_types = { " this " : True , " exists " : False }
2025-02-13 21:17:09 +01:00
class Refresh ( Expression ) :
pass
2025-02-13 20:48:36 +01:00
class DDL ( Expression ) :
@property
2025-02-13 21:29:39 +01:00
def ctes ( self ) - > t . List [ CTE ] :
""" Returns a list of all the CTEs attached to this statement. """
2025-02-13 20:48:36 +01:00
with_ = self . args . get ( " with " )
2025-02-13 21:29:39 +01:00
return with_ . expressions if with_ else [ ]
2025-02-13 20:48:36 +01:00
@property
2025-02-13 21:29:39 +01:00
def selects ( self ) - > t . List [ Expression ] :
""" If this statement contains a query (e.g. a CTAS), this returns the query ' s projections. """
return self . expression . selects if isinstance ( self . expression , Query ) else [ ]
2025-02-13 20:48:36 +01:00
@property
2025-02-13 21:29:39 +01:00
def named_selects ( self ) - > t . List [ str ] :
"""
If this statement contains a query ( e . g . a CTAS ) , this returns the output
names of the query ' s projections.
"""
return self . expression . named_selects if isinstance ( self . expression , Query ) else [ ]
2025-02-13 20:48:36 +01:00
2025-02-13 21:17:09 +01:00
class DML ( Expression ) :
def returning (
self ,
expression : ExpOrStr ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 21:55:19 +01:00
) - > " Self " :
2025-02-13 21:17:09 +01:00
"""
Set the RETURNING expression . Not supported by all dialects .
Example :
>> > delete ( " tbl " ) . returning ( " * " , dialect = " postgres " ) . sql ( )
' DELETE FROM tbl RETURNING * '
Args :
expression : the SQL code strings to parse .
If an ` Expression ` instance is passed , it will be used as - is .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
Delete : the modified expression .
"""
return _apply_builder (
expression = expression ,
instance = self ,
arg = " returning " ,
prefix = " RETURNING " ,
dialect = dialect ,
copy = copy ,
into = Returning ,
* * opts ,
)
2025-02-13 20:48:36 +01:00
class Create ( DDL ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" with " : False ,
" this " : True ,
" kind " : True ,
" expression " : False ,
" exists " : False ,
" properties " : False ,
" replace " : False ,
2025-02-13 21:52:32 +01:00
" refresh " : False ,
2025-02-13 06:15:54 +01:00
" unique " : False ,
2025-02-13 15:03:38 +01:00
" indexes " : False ,
2025-02-13 15:05:06 +01:00
" no_schema_binding " : False ,
2025-02-13 15:07:05 +01:00
" begin " : False ,
2025-02-13 21:04:58 +01:00
" end " : False ,
2025-02-13 15:57:23 +01:00
" clone " : False ,
2025-02-13 21:51:42 +01:00
" concurrently " : False ,
" clustered " : False ,
2025-02-13 15:57:23 +01:00
}
2025-02-13 21:27:51 +01:00
@property
def kind ( self ) - > t . Optional [ str ] :
kind = self . args . get ( " kind " )
return kind and kind . upper ( )
2025-02-13 15:57:23 +01:00
2025-02-13 21:30:28 +01:00
class SequenceProperties ( Expression ) :
arg_types = {
" increment " : False ,
" minvalue " : False ,
" maxvalue " : False ,
" cache " : False ,
" start " : False ,
" owned " : False ,
" options " : False ,
}
2025-02-13 21:29:39 +01:00
class TruncateTable ( Expression ) :
arg_types = {
" expressions " : True ,
" is_database " : False ,
" exists " : False ,
" only " : False ,
" cluster " : False ,
" identity " : False ,
" option " : False ,
" partition " : False ,
}
2025-02-13 15:57:23 +01:00
# https://docs.snowflake.com/en/sql-reference/sql/create-clone
2025-02-13 21:04:58 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_clone_statement
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_copy
2025-02-13 15:57:23 +01:00
class Clone ( Expression ) :
2025-02-13 21:17:51 +01:00
arg_types = { " this " : True , " shallow " : False , " copy " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 14:46:58 +01:00
class Describe ( Expression ) :
2025-02-13 21:52:32 +01:00
arg_types = {
" this " : True ,
" style " : False ,
" kind " : False ,
" expressions " : False ,
" partition " : False ,
2025-02-13 21:57:37 +01:00
" format " : False ,
2025-02-13 21:52:32 +01:00
}
2025-02-13 21:52:10 +01:00
2025-02-13 21:57:37 +01:00
# https://duckdb.org/docs/sql/statements/attach.html#attach
class Attach ( Expression ) :
arg_types = { " this " : True , " exists " : False , " expressions " : False }
# https://duckdb.org/docs/sql/statements/attach.html#detach
class Detach ( Expression ) :
arg_types = { " this " : True , " exists " : False }
2025-02-13 21:52:10 +01:00
# https://duckdb.org/docs/guides/meta/summarize.html
class Summarize ( Expression ) :
arg_types = { " this " : True , " table " : False }
2025-02-13 14:46:58 +01:00
2025-02-13 21:03:38 +01:00
class Kill ( Expression ) :
arg_types = { " this " : True , " kind " : False }
2025-02-13 15:48:10 +01:00
class Pragma ( Expression ) :
pass
2025-02-13 21:35:32 +01:00
class Declare ( Expression ) :
arg_types = { " expressions " : True }
class DeclareItem ( Expression ) :
arg_types = { " this " : True , " kind " : True , " default " : False }
2025-02-13 14:53:05 +01:00
class Set ( Expression ) :
2025-02-13 20:21:40 +01:00
arg_types = { " expressions " : False , " unset " : False , " tag " : False }
2025-02-13 14:53:05 +01:00
2025-02-13 21:19:14 +01:00
class Heredoc ( Expression ) :
arg_types = { " this " : True , " tag " : False }
2025-02-13 14:53:05 +01:00
class SetItem ( Expression ) :
arg_types = {
2025-02-13 14:54:32 +01:00
" this " : False ,
" expressions " : False ,
2025-02-13 14:53:05 +01:00
" kind " : False ,
" collate " : False , # MySQL SET NAMES statement
2025-02-13 14:54:32 +01:00
" global " : False ,
2025-02-13 14:53:05 +01:00
}
class Show ( Expression ) :
arg_types = {
" this " : True ,
2025-02-13 21:20:36 +01:00
" history " : False ,
2025-02-13 21:19:58 +01:00
" terse " : False ,
2025-02-13 14:53:05 +01:00
" target " : False ,
" offset " : False ,
2025-02-13 21:19:58 +01:00
" starts_with " : False ,
2025-02-13 14:53:05 +01:00
" limit " : False ,
2025-02-13 21:19:58 +01:00
" from " : False ,
2025-02-13 14:53:05 +01:00
" like " : False ,
" where " : False ,
" db " : False ,
2025-02-13 20:58:22 +01:00
" scope " : False ,
" scope_kind " : False ,
2025-02-13 14:53:05 +01:00
" full " : False ,
" mutex " : False ,
" query " : False ,
" channel " : False ,
" global " : False ,
" log " : False ,
" position " : False ,
" types " : False ,
}
2025-02-13 14:40:43 +01:00
class UserDefinedFunction ( Expression ) :
2025-02-13 15:07:05 +01:00
arg_types = { " this " : True , " expressions " : False , " wrapped " : False }
2025-02-13 14:40:43 +01:00
2025-02-13 06:15:54 +01:00
class CharacterSet ( Expression ) :
arg_types = { " this " : True , " default " : False }
class With ( Expression ) :
arg_types = { " expressions " : True , " recursive " : False }
2025-02-13 15:01:55 +01:00
@property
def recursive ( self ) - > bool :
return bool ( self . args . get ( " recursive " ) )
2025-02-13 06:15:54 +01:00
class WithinGroup ( Expression ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:17:09 +01:00
# clickhouse supports scalar ctes
# https://clickhouse.com/docs/en/sql-reference/statements/select/with
2025-02-13 06:15:54 +01:00
class CTE ( DerivedTable ) :
2025-02-13 21:30:28 +01:00
arg_types = {
" this " : True ,
" alias " : True ,
" scalar " : False ,
" materialized " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:35:32 +01:00
class ProjectionDef ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class TableAlias ( Expression ) :
arg_types = { " this " : False , " columns " : False }
@property
def columns ( self ) :
return self . args . get ( " columns " ) or [ ]
class BitString ( Condition ) :
pass
2025-02-13 08:04:41 +01:00
class HexString ( Condition ) :
pass
2025-02-13 14:48:46 +01:00
class ByteString ( Condition ) :
pass
2025-02-13 15:57:23 +01:00
class RawString ( Condition ) :
pass
2025-02-13 21:17:51 +01:00
class UnicodeString ( Condition ) :
arg_types = { " this " : True , " escape " : False }
2025-02-13 06:15:54 +01:00
class Column ( Condition ) :
2025-02-13 15:42:13 +01:00
arg_types = { " this " : True , " table " : False , " db " : False , " catalog " : False , " join_mark " : False }
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:26:26 +01:00
def table ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . text ( " table " )
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:26:26 +01:00
def db ( self ) - > str :
return self . text ( " db " )
@property
def catalog ( self ) - > str :
return self . text ( " catalog " )
@property
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . name
2025-02-13 15:46:19 +01:00
@property
def parts ( self ) - > t . List [ Identifier ] :
""" Return the parts of a column in order catalog, db, table, name. """
2025-02-13 15:57:23 +01:00
return [
t . cast ( Identifier , self . args [ part ] )
for part in ( " catalog " , " db " , " table " , " this " )
if self . args . get ( part )
]
2025-02-13 15:46:19 +01:00
2025-02-13 21:03:38 +01:00
def to_dot ( self ) - > Dot | Identifier :
2025-02-13 15:46:19 +01:00
""" Converts the column into a dot expression. """
parts = self . parts
parent = self . parent
while parent :
if isinstance ( parent , Dot ) :
parts . append ( parent . expression )
parent = parent . parent
2025-02-13 21:03:38 +01:00
return Dot . build ( deepcopy ( parts ) ) if len ( parts ) > 1 else parts [ 0 ]
2025-02-13 15:46:19 +01:00
2025-02-13 06:15:54 +01:00
2025-02-13 15:50:57 +01:00
class ColumnPosition ( Expression ) :
arg_types = { " this " : False , " position " : True }
2025-02-13 06:15:54 +01:00
class ColumnDef ( Expression ) :
arg_types = {
" this " : True ,
2025-02-13 15:03:38 +01:00
" kind " : False ,
2025-02-13 06:15:54 +01:00
" constraints " : False ,
2025-02-13 15:01:55 +01:00
" exists " : False ,
2025-02-13 15:50:57 +01:00
" position " : False ,
2025-02-13 15:01:55 +01:00
}
2025-02-13 15:53:39 +01:00
@property
def constraints ( self ) - > t . List [ ColumnConstraint ] :
return self . args . get ( " constraints " ) or [ ]
2025-02-13 21:29:39 +01:00
@property
def kind ( self ) - > t . Optional [ DataType ] :
return self . args . get ( " kind " )
2025-02-13 15:01:55 +01:00
class AlterColumn ( Expression ) :
arg_types = {
" this " : True ,
" dtype " : False ,
" collate " : False ,
" using " : False ,
" default " : False ,
" drop " : False ,
2025-02-13 21:19:58 +01:00
" comment " : False ,
2025-02-13 21:37:09 +01:00
" allow_null " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 21:35:32 +01:00
# https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html
class AlterDistStyle ( Expression ) :
pass
class AlterSortKey ( Expression ) :
arg_types = { " this " : False , " expressions " : False , " compound " : False }
class AlterSet ( Expression ) :
arg_types = {
" expressions " : False ,
" option " : False ,
" tablespace " : False ,
" access_method " : False ,
" file_format " : False ,
" copy_options " : False ,
" tag " : False ,
" location " : False ,
" serde " : False ,
}
2025-02-13 21:19:58 +01:00
class RenameColumn ( Expression ) :
arg_types = { " this " : True , " to " : True , " exists " : False }
2025-02-13 21:56:02 +01:00
class AlterRename ( Expression ) :
2025-02-13 15:05:06 +01:00
pass
2025-02-13 21:09:41 +01:00
class SwapTable ( Expression ) :
pass
2025-02-13 15:42:13 +01:00
class Comment ( Expression ) :
2025-02-13 21:31:23 +01:00
arg_types = {
" this " : True ,
" kind " : True ,
" expression " : True ,
" exists " : False ,
" materialized " : False ,
}
2025-02-13 15:42:13 +01:00
2025-02-13 20:58:22 +01:00
class Comprehension ( Expression ) :
arg_types = { " this " : True , " expression " : True , " iterator " : True , " condition " : False }
2025-02-13 15:57:23 +01:00
# https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
class MergeTreeTTLAction ( Expression ) :
arg_types = {
" this " : True ,
" delete " : False ,
" recompress " : False ,
" to_disk " : False ,
" to_volume " : False ,
}
# https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
class MergeTreeTTL ( Expression ) :
arg_types = {
" expressions " : True ,
" where " : False ,
" group " : False ,
" aggregates " : False ,
}
2025-02-13 20:48:36 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/create-table.html
class IndexConstraintOption ( Expression ) :
arg_types = {
" key_block_size " : False ,
" using " : False ,
" parser " : False ,
" comment " : False ,
" visible " : False ,
" engine_attr " : False ,
" secondary_engine_attr " : False ,
}
2025-02-13 06:15:54 +01:00
class ColumnConstraint ( Expression ) :
arg_types = { " this " : False , " kind " : True }
2025-02-13 15:53:39 +01:00
@property
def kind ( self ) - > ColumnConstraintKind :
return self . args [ " kind " ]
2025-02-13 06:15:54 +01:00
2025-02-13 08:04:41 +01:00
class ColumnConstraintKind ( Expression ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 08:04:41 +01:00
class AutoIncrementColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 21:17:09 +01:00
class PeriodForSystemTimeConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 15:26:26 +01:00
class CaseSpecificColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " not_ " : True }
class CharacterSetColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : True }
2025-02-13 08:04:41 +01:00
class CheckColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 21:29:39 +01:00
arg_types = { " this " : True , " enforced " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 20:58:22 +01:00
class ClusteredColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 08:04:41 +01:00
class CollateColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 08:04:41 +01:00
class CommentColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 15:30:09 +01:00
class CompressColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 21:54:13 +01:00
arg_types = { " this " : False }
2025-02-13 15:30:09 +01:00
2025-02-13 15:26:26 +01:00
class DateFormatColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : True }
2025-02-13 08:04:41 +01:00
class DefaultColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 14:58:37 +01:00
class EncodeColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 21:30:28 +01:00
# https://www.postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-EXCLUDE
class ExcludeColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 21:31:23 +01:00
class EphemeralColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : False }
2025-02-13 21:30:28 +01:00
class WithOperator ( Expression ) :
arg_types = { " this " : True , " op " : True }
2025-02-13 08:04:41 +01:00
class GeneratedAsIdentityColumnConstraint ( ColumnConstraintKind ) :
# this: True -> ALWAYS, this: False -> BY DEFAULT
2025-02-13 15:26:26 +01:00
arg_types = {
" this " : False ,
2025-02-13 15:57:23 +01:00
" expression " : False ,
" on_null " : False ,
2025-02-13 15:26:26 +01:00
" start " : False ,
" increment " : False ,
" minvalue " : False ,
" maxvalue " : False ,
" cycle " : False ,
}
2025-02-13 08:04:41 +01:00
2025-02-13 21:17:09 +01:00
class GeneratedAsRowColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 21:19:58 +01:00
arg_types = { " start " : False , " hidden " : False }
2025-02-13 21:17:09 +01:00
2025-02-13 20:48:36 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/create-table.html
2025-02-13 21:30:28 +01:00
# https://github.com/ClickHouse/ClickHouse/blob/master/src/Parsers/ParserCreateQuery.h#L646
2025-02-13 20:48:36 +01:00
class IndexColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 21:02:36 +01:00
arg_types = {
" this " : False ,
2025-02-13 21:30:28 +01:00
" expressions " : False ,
2025-02-13 21:02:36 +01:00
" kind " : False ,
" index_type " : False ,
" options " : False ,
2025-02-13 21:30:28 +01:00
" expression " : False , # Clickhouse
" granularity " : False ,
2025-02-13 21:02:36 +01:00
}
2025-02-13 20:48:36 +01:00
2025-02-13 15:30:09 +01:00
class InlineLengthColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 20:58:22 +01:00
class NonClusteredColumnConstraint ( ColumnConstraintKind ) :
pass
class NotForReplicationColumnConstraint ( ColumnConstraintKind ) :
arg_types = { }
2025-02-13 21:37:09 +01:00
# https://docs.snowflake.com/en/sql-reference/sql/create-table
class MaskingPolicyColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : True , " expressions " : False }
2025-02-13 08:04:41 +01:00
class NotNullColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 14:58:37 +01:00
arg_types = { " allow_null " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:52:09 +01:00
# https://dev.mysql.com/doc/refman/5.7/en/timestamp-initialization.html
class OnUpdateColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 21:17:09 +01:00
# https://docs.snowflake.com/en/sql-reference/sql/create-external-table#optional-parameters
class TransformColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 08:04:41 +01:00
class PrimaryKeyColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 14:56:25 +01:00
arg_types = { " desc " : False }
2025-02-13 08:04:41 +01:00
2025-02-13 15:26:26 +01:00
class TitleColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 08:04:41 +01:00
class UniqueColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 21:43:00 +01:00
arg_types = { " this " : False , " index_type " : False , " on_conflict " : False , " nulls " : False }
2025-02-13 15:26:26 +01:00
class UppercaseColumnConstraint ( ColumnConstraintKind ) :
arg_types : t . Dict [ str , t . Any ] = { }
2025-02-13 21:57:37 +01:00
# https://docs.risingwave.com/processing/watermarks#syntax
class WatermarkColumnConstraint ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 15:26:26 +01:00
class PathColumnConstraint ( ColumnConstraintKind ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 21:37:09 +01:00
# https://docs.snowflake.com/en/sql-reference/sql/create-table
class ProjectionPolicyColumnConstraint ( ColumnConstraintKind ) :
pass
2025-02-13 20:58:22 +01:00
# computed column expression
# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-transact-sql?view=sql-server-ver16
class ComputedColumnConstraint ( ColumnConstraintKind ) :
arg_types = { " this " : True , " persisted " : False , " not_null " : False }
2025-02-13 06:15:54 +01:00
class Constraint ( Expression ) :
arg_types = { " this " : True , " expressions " : True }
2025-02-13 21:17:09 +01:00
class Delete ( DML ) :
2025-02-13 20:04:59 +01:00
arg_types = {
" with " : False ,
" this " : False ,
" using " : False ,
" where " : False ,
" returning " : False ,
" limit " : False ,
2025-02-13 20:43:05 +01:00
" tables " : False , # Multiple-Table Syntax (MySQL)
2025-02-13 21:54:47 +01:00
" cluster " : False , # Clickhouse
2025-02-13 20:04:59 +01:00
}
2025-02-13 06:15:54 +01:00
2025-02-13 15:46:19 +01:00
def delete (
self ,
table : ExpOrStr ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Delete :
"""
Create a DELETE expression or replace the table on an existing DELETE expression .
Example :
>> > delete ( " tbl " ) . sql ( )
' DELETE FROM tbl '
Args :
table : the table from which to delete .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
Delete : the modified expression .
"""
return _apply_builder (
expression = table ,
instance = self ,
arg = " this " ,
dialect = dialect ,
into = Table ,
copy = copy ,
* * opts ,
)
def where (
self ,
2025-02-13 15:57:23 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
2025-02-13 15:46:19 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Delete :
"""
Append to or set the WHERE expressions .
Example :
>> > delete ( " tbl " ) . where ( " x = ' a ' OR x < ' b ' " ) . sql ( )
" DELETE FROM tbl WHERE x = ' a ' OR x < ' b ' "
Args :
* expressions : the SQL code strings to parse .
If an ` Expression ` instance is passed , it will be used as - is .
Multiple expressions are combined with an AND operator .
append : if ` True ` , AND the new expressions to any existing expression .
Otherwise , this resets the expression .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
Delete : the modified expression .
"""
return _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " where " ,
append = append ,
into = Where ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 06:15:54 +01:00
class Drop ( Expression ) :
2025-02-13 14:43:32 +01:00
arg_types = {
" this " : False ,
" kind " : False ,
2025-02-13 21:30:28 +01:00
" expressions " : False ,
2025-02-13 14:43:32 +01:00
" exists " : False ,
" temporary " : False ,
" materialized " : False ,
2025-02-13 14:54:32 +01:00
" cascade " : False ,
2025-02-13 15:48:10 +01:00
" constraints " : False ,
2025-02-13 15:52:09 +01:00
" purge " : False ,
2025-02-13 21:35:32 +01:00
" cluster " : False ,
2025-02-13 21:54:13 +01:00
" concurrently " : False ,
2025-02-13 14:43:32 +01:00
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:52:32 +01:00
@property
def kind ( self ) - > t . Optional [ str ] :
kind = self . args . get ( " kind " )
return kind and kind . upper ( )
2025-02-13 06:15:54 +01:00
class Filter ( Expression ) :
arg_types = { " this " : True , " expression " : True }
class Check ( Expression ) :
pass
2025-02-13 21:41:14 +01:00
class Changes ( Expression ) :
arg_types = { " information " : True , " at_before " : False , " end " : False }
2025-02-13 20:58:22 +01:00
# https://docs.snowflake.com/en/sql-reference/constructs/connect-by
class Connect ( Expression ) :
2025-02-13 21:30:28 +01:00
arg_types = { " start " : False , " connect " : True , " nocycle " : False }
2025-02-13 20:58:22 +01:00
2025-02-13 21:33:25 +01:00
class CopyParameter ( Expression ) :
2025-02-13 21:37:09 +01:00
arg_types = { " this " : True , " expression " : False , " expressions " : False }
2025-02-13 21:33:25 +01:00
2025-02-13 21:43:00 +01:00
class Copy ( DML ) :
2025-02-13 21:33:25 +01:00
arg_types = {
" this " : True ,
" kind " : True ,
" files " : True ,
" credentials " : False ,
" format " : False ,
" params " : False ,
}
class Credentials ( Expression ) :
arg_types = {
" credentials " : False ,
" encryption " : False ,
" storage " : False ,
" iam_role " : False ,
" region " : False ,
}
2025-02-13 20:58:22 +01:00
class Prior ( Expression ) :
pass
2025-02-13 14:46:58 +01:00
class Directory ( Expression ) :
# https://spark.apache.org/docs/3.0.0-preview/sql-ref-syntax-dml-insert-overwrite-directory-hive.html
arg_types = { " this " : True , " local " : False , " row_format " : False }
2025-02-13 06:15:54 +01:00
class ForeignKey ( Expression ) :
arg_types = {
" expressions " : True ,
" reference " : False ,
" delete " : False ,
" update " : False ,
}
2025-02-13 21:01:12 +01:00
class ColumnPrefix ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 15:07:05 +01:00
class PrimaryKey ( Expression ) :
arg_types = { " expressions " : True , " options " : False }
2025-02-13 14:56:25 +01:00
# https://www.postgresql.org/docs/9.1/sql-selectinto.html
# https://docs.aws.amazon.com/redshift/latest/dg/r_SELECT_INTO.html#r_SELECT_INTO-examples
class Into ( Expression ) :
2025-02-13 21:55:40 +01:00
arg_types = {
" this " : False ,
" temporary " : False ,
" unlogged " : False ,
" bulk_collect " : False ,
" expressions " : False ,
}
2025-02-13 14:56:25 +01:00
2025-02-13 06:15:54 +01:00
class From ( Expression ) :
2025-02-13 15:57:23 +01:00
@property
def name ( self ) - > str :
return self . this . name
@property
def alias_or_name ( self ) - > str :
return self . this . alias_or_name
2025-02-13 06:15:54 +01:00
class Having ( Expression ) :
pass
class Hint ( Expression ) :
arg_types = { " expressions " : True }
2025-02-13 14:45:11 +01:00
class JoinHint ( Expression ) :
arg_types = { " this " : True , " expressions " : True }
2025-02-13 06:15:54 +01:00
class Identifier ( Expression ) :
2025-02-13 20:48:36 +01:00
arg_types = { " this " : True , " quoted " : False , " global " : False , " temporary " : False }
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:57:23 +01:00
def quoted ( self ) - > bool :
2025-02-13 06:15:54 +01:00
return bool ( self . args . get ( " quoted " ) )
2025-02-13 15:48:10 +01:00
@property
def hashable_args ( self ) - > t . Any :
2025-02-13 20:43:05 +01:00
return ( self . this , self . quoted )
2025-02-13 06:15:54 +01:00
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . name
2025-02-13 06:15:54 +01:00
2025-02-13 21:04:58 +01:00
# https://www.postgresql.org/docs/current/indexes-opclass.html
class Opclass ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Index ( Expression ) :
2025-02-13 15:03:38 +01:00
arg_types = {
" this " : False ,
" table " : False ,
" unique " : False ,
" primary " : False ,
" amp " : False , # teradata
2025-02-13 21:30:28 +01:00
" params " : False ,
}
class IndexParameters ( Expression ) :
arg_types = {
" using " : False ,
2025-02-13 21:19:58 +01:00
" include " : False ,
2025-02-13 21:30:28 +01:00
" columns " : False ,
" with_storage " : False ,
" partition_by " : False ,
" tablespace " : False ,
" where " : False ,
2025-02-13 21:41:14 +01:00
" on " : False ,
2025-02-13 15:03:38 +01:00
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
class Insert ( DDL , DML ) :
2025-02-13 06:15:54 +01:00
arg_types = {
2025-02-13 21:30:28 +01:00
" hint " : False ,
2025-02-13 06:15:54 +01:00
" with " : False ,
2025-02-13 21:30:28 +01:00
" is_function " : False ,
2025-02-13 21:33:25 +01:00
" this " : False ,
2025-02-13 14:58:37 +01:00
" expression " : False ,
2025-02-13 15:52:09 +01:00
" conflict " : False ,
2025-02-13 15:43:32 +01:00
" returning " : False ,
2025-02-13 06:15:54 +01:00
" overwrite " : False ,
" exists " : False ,
2025-02-13 15:26:26 +01:00
" alternative " : False ,
2025-02-13 20:15:27 +01:00
" where " : False ,
2025-02-13 20:43:05 +01:00
" ignore " : False ,
2025-02-13 20:58:22 +01:00
" by_name " : False ,
2025-02-13 21:32:41 +01:00
" stored " : False ,
2025-02-13 21:52:32 +01:00
" partition " : False ,
" settings " : False ,
2025-02-13 21:52:55 +01:00
" source " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 15:57:23 +01:00
def with_ (
self ,
alias : ExpOrStr ,
as_ : ExpOrStr ,
recursive : t . Optional [ bool ] = None ,
2025-02-13 21:54:13 +01:00
materialized : t . Optional [ bool ] = None ,
2025-02-13 15:57:23 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Insert :
"""
Append to or set the common table expressions .
Example :
>> > insert ( " SELECT x FROM cte " , " t " ) . with_ ( " cte " , as_ = " SELECT * FROM tbl " ) . sql ( )
' WITH cte AS (SELECT * FROM tbl) INSERT INTO t SELECT x FROM cte '
Args :
alias : the SQL code string to parse as the table name .
If an ` Expression ` instance is passed , this is used as - is .
as_ : the SQL code string to parse as the table expression .
If an ` Expression ` instance is passed , it will be used as - is .
recursive : set the RECURSIVE part of the expression . Defaults to ` False ` .
2025-02-13 21:54:13 +01:00
materialized : set the MATERIALIZED part of the expression .
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
Otherwise , this resets the expressions .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified expression .
"""
return _apply_cte_builder (
2025-02-13 21:54:13 +01:00
self ,
alias ,
as_ ,
recursive = recursive ,
materialized = materialized ,
append = append ,
dialect = dialect ,
copy = copy ,
* * opts ,
2025-02-13 15:57:23 +01:00
)
2025-02-13 06:15:54 +01:00
2025-02-13 21:54:13 +01:00
class ConditionalInsert ( Expression ) :
arg_types = { " this " : True , " expression " : False , " else_ " : False }
class MultitableInserts ( Expression ) :
arg_types = { " expressions " : True , " kind " : True , " source " : True }
2025-02-13 15:52:09 +01:00
class OnConflict ( Expression ) :
arg_types = {
" duplicate " : False ,
" expressions " : False ,
2025-02-13 21:30:28 +01:00
" action " : False ,
" conflict_keys " : False ,
2025-02-13 15:52:09 +01:00
" constraint " : False ,
}
2025-02-13 21:54:13 +01:00
class OnCondition ( Expression ) :
arg_types = { " error " : False , " empty " : False , " null " : False }
2025-02-13 15:43:32 +01:00
class Returning ( Expression ) :
2025-02-13 20:45:52 +01:00
arg_types = { " expressions " : True , " into " : False }
2025-02-13 15:43:32 +01:00
2025-02-13 06:15:54 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
class Introducer ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 15:01:55 +01:00
# national char, like n'utf8'
class National ( Expression ) :
pass
2025-02-13 14:46:58 +01:00
class LoadData ( Expression ) :
arg_types = {
" this " : True ,
" local " : False ,
" overwrite " : False ,
" inpath " : True ,
" partition " : False ,
" input_format " : False ,
" serde " : False ,
}
2025-02-13 06:15:54 +01:00
class Partition ( Expression ) :
2025-02-13 15:08:15 +01:00
arg_types = { " expressions " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 21:29:39 +01:00
class PartitionRange ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 21:35:32 +01:00
# https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression
class PartitionId ( Expression ) :
pass
2025-02-13 06:15:54 +01:00
class Fetch ( Expression ) :
2025-02-13 15:52:09 +01:00
arg_types = {
" direction " : False ,
" count " : False ,
" percent " : False ,
" with_ties " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:55:19 +01:00
class Grant ( Expression ) :
arg_types = {
" privileges " : True ,
" kind " : False ,
" securable " : True ,
" principals " : True ,
" grant_option " : False ,
}
2025-02-13 06:15:54 +01:00
class Group ( Expression ) :
arg_types = {
" expressions " : False ,
" grouping_sets " : False ,
" cube " : False ,
" rollup " : False ,
2025-02-13 15:57:23 +01:00
" totals " : False ,
2025-02-13 20:43:05 +01:00
" all " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 21:52:55 +01:00
class Cube ( Expression ) :
arg_types = { " expressions " : False }
class Rollup ( Expression ) :
arg_types = { " expressions " : False }
class GroupingSets ( Expression ) :
arg_types = { " expressions " : True }
2025-02-13 06:15:54 +01:00
class Lambda ( Expression ) :
arg_types = { " this " : True , " expressions " : True }
class Limit ( Expression ) :
2025-02-13 21:21:45 +01:00
arg_types = { " this " : False , " expression " : True , " offset " : False , " expressions " : False }
2025-02-13 06:15:54 +01:00
class Literal ( Condition ) :
arg_types = { " this " : True , " is_string " : True }
2025-02-13 15:48:10 +01:00
@property
def hashable_args ( self ) - > t . Any :
return ( self . this , self . args . get ( " is_string " ) )
2025-02-13 06:15:54 +01:00
@classmethod
2025-02-13 14:53:05 +01:00
def number ( cls , number ) - > Literal :
2025-02-13 06:15:54 +01:00
return cls ( this = str ( number ) , is_string = False )
@classmethod
2025-02-13 14:53:05 +01:00
def string ( cls , string ) - > Literal :
2025-02-13 06:15:54 +01:00
return cls ( this = str ( string ) , is_string = True )
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . name
2025-02-13 21:41:14 +01:00
def to_py ( self ) - > int | str | Decimal :
if self . is_number :
try :
return int ( self . this )
except ValueError :
return Decimal ( self . this )
return self . this
2025-02-13 06:15:54 +01:00
class Join ( Expression ) :
arg_types = {
" this " : True ,
" on " : False ,
" side " : False ,
" kind " : False ,
" using " : False ,
2025-02-13 15:58:40 +01:00
" method " : False ,
2025-02-13 15:57:23 +01:00
" global " : False ,
2025-02-13 15:52:09 +01:00
" hint " : False ,
2025-02-13 21:30:28 +01:00
" match_condition " : False , # Snowflake
2025-02-13 21:56:02 +01:00
" expressions " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 15:58:40 +01:00
@property
def method ( self ) - > str :
return self . text ( " method " ) . upper ( )
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:57:23 +01:00
def kind ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . text ( " kind " ) . upper ( )
@property
2025-02-13 15:57:23 +01:00
def side ( self ) - > str :
2025-02-13 06:15:54 +01:00
return self . text ( " side " ) . upper ( )
2025-02-13 15:52:09 +01:00
@property
2025-02-13 15:57:23 +01:00
def hint ( self ) - > str :
2025-02-13 15:52:09 +01:00
return self . text ( " hint " ) . upper ( )
2025-02-13 08:04:41 +01:00
@property
2025-02-13 15:57:23 +01:00
def alias_or_name ( self ) - > str :
2025-02-13 08:04:41 +01:00
return self . this . alias_or_name
2025-02-13 15:57:23 +01:00
def on (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Join :
2025-02-13 06:15:54 +01:00
"""
Append to or set the ON expressions .
Example :
>> > import sqlglot
>> > sqlglot . parse_one ( " JOIN x " , into = Join ) . on ( " y = 1 " ) . sql ( )
' JOIN x ON y = 1 '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
Multiple expressions are combined with an AND operator .
2025-02-13 15:57:23 +01:00
append : if ` True ` , AND the new expressions to any existing expression .
2025-02-13 06:15:54 +01:00
Otherwise , this resets the expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Join expression .
2025-02-13 06:15:54 +01:00
"""
join = _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " on " ,
append = append ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
if join . kind == " CROSS " :
join . set ( " kind " , None )
return join
2025-02-13 15:57:23 +01:00
def using (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Join :
2025-02-13 14:51:47 +01:00
"""
Append to or set the USING expressions .
Example :
>> > import sqlglot
>> > sqlglot . parse_one ( " JOIN x " , into = Join ) . using ( " foo " , " bla " ) . sql ( )
' JOIN x USING (foo, bla) '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 14:51:47 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
append : if ` True ` , concatenate the new expressions to the existing " using " list .
2025-02-13 14:51:47 +01:00
Otherwise , this resets the expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 14:51:47 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Join expression .
2025-02-13 14:51:47 +01:00
"""
join = _apply_list_builder (
* expressions ,
instance = self ,
arg = " using " ,
append = append ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
if join . kind == " CROSS " :
join . set ( " kind " , None )
return join
2025-02-13 06:15:54 +01:00
2025-02-13 14:43:32 +01:00
class Lateral ( UDTF ) :
2025-02-13 21:19:14 +01:00
arg_types = {
" this " : True ,
" view " : False ,
" outer " : False ,
" alias " : False ,
" cross_apply " : False , # True -> CROSS APPLY, False -> OUTER APPLY
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:31:23 +01:00
class MatchRecognizeMeasure ( Expression ) :
arg_types = {
" this " : True ,
" window_frame " : False ,
}
2025-02-13 15:08:15 +01:00
class MatchRecognize ( Expression ) :
arg_types = {
" partition_by " : False ,
" order " : False ,
" measures " : False ,
" rows " : False ,
" after " : False ,
" pattern " : False ,
" define " : False ,
2025-02-13 15:52:09 +01:00
" alias " : False ,
2025-02-13 15:08:15 +01:00
}
2025-02-13 06:15:54 +01:00
# Clickhouse FROM FINAL modifier
# https://clickhouse.com/docs/en/sql-reference/statements/select/from/#final-modifier
class Final ( Expression ) :
pass
class Offset ( Expression ) :
2025-02-13 21:21:45 +01:00
arg_types = { " this " : False , " expression " : True , " expressions " : False }
2025-02-13 06:15:54 +01:00
class Order ( Expression ) :
2025-02-13 21:52:32 +01:00
arg_types = { " this " : False , " expressions " : True , " siblings " : False }
2025-02-13 21:17:51 +01:00
# https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier
class WithFill ( Expression ) :
2025-02-13 21:52:32 +01:00
arg_types = {
" from " : False ,
" to " : False ,
" step " : False ,
" interpolate " : False ,
}
2025-02-13 06:15:54 +01:00
# hive specific sorts
# https://cwiki.apache.org/confluence/display/Hive/LanguageManual+SortBy
class Cluster ( Order ) :
pass
class Distribute ( Order ) :
pass
class Sort ( Order ) :
pass
class Ordered ( Expression ) :
2025-02-13 21:17:51 +01:00
arg_types = { " this " : True , " desc " : False , " nulls_first " : True , " with_fill " : False }
2025-02-13 06:15:54 +01:00
class Property ( Expression ) :
arg_types = { " this " : True , " value " : True }
2025-02-13 21:55:19 +01:00
class GrantPrivilege ( Expression ) :
arg_types = { " this " : True , " expressions " : False }
class GrantPrincipal ( Expression ) :
arg_types = { " this " : True , " kind " : False }
2025-02-13 21:35:32 +01:00
class AllowedValuesProperty ( Expression ) :
arg_types = { " expressions " : True }
2025-02-13 15:40:23 +01:00
class AlgorithmProperty ( Property ) :
2025-02-13 15:09:58 +01:00
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class AutoIncrementProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:09:58 +01:00
2025-02-13 21:19:14 +01:00
# https://docs.aws.amazon.com/prescriptive-guidance/latest/materialized-views-redshift/refreshing-materialized-views.html
class AutoRefreshProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 21:30:28 +01:00
class BackupProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class BlockCompressionProperty ( Property ) :
2025-02-13 21:19:58 +01:00
arg_types = {
" autotemp " : False ,
" always " : False ,
" default " : False ,
" manual " : False ,
" never " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class CharacterSetProperty ( Property ) :
arg_types = { " this " : True , " default " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class ChecksumProperty ( Property ) :
arg_types = { " on " : False , " default " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class CollateProperty ( Property ) :
2025-02-13 21:09:41 +01:00
arg_types = { " this " : True , " default " : False }
2025-02-13 14:54:32 +01:00
2025-02-13 20:04:59 +01:00
class CopyGrantsProperty ( Property ) :
arg_types = { }
2025-02-13 15:40:23 +01:00
class DataBlocksizeProperty ( Property ) :
2025-02-13 15:57:23 +01:00
arg_types = {
" size " : False ,
" units " : False ,
" minimum " : False ,
" maximum " : False ,
" default " : False ,
}
2025-02-13 14:54:32 +01:00
2025-02-13 21:35:32 +01:00
class DataDeletionProperty ( Property ) :
arg_types = { " on " : True , " filter_col " : False , " retention_period " : False }
2025-02-13 15:40:23 +01:00
class DefinerProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class DistKeyProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 14:54:32 +01:00
2025-02-13 21:54:13 +01:00
# https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/CREATE_TABLE/#distribution_desc
# https://doris.apache.org/docs/sql-manual/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE?_highlight=create&_highlight=table#distribution_desc
class DistributedByProperty ( Property ) :
arg_types = { " expressions " : False , " kind " : True , " buckets " : False , " order " : False }
2025-02-13 15:40:23 +01:00
class DistStyleProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 21:54:13 +01:00
class DuplicateKeyProperty ( Property ) :
arg_types = { " expressions " : True }
2025-02-13 06:15:54 +01:00
class EngineProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 20:48:36 +01:00
class HeapProperty ( Property ) :
arg_types = { }
2025-02-13 16:00:51 +01:00
class ToTableProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class ExecuteAsProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class ExternalProperty ( Property ) :
arg_types = { " this " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class FallbackProperty ( Property ) :
arg_types = { " no " : True , " protection " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class FileFormatProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
class FreespaceProperty ( Property ) :
arg_types = { " this " : True , " percent " : False }
2025-02-13 21:30:28 +01:00
class GlobalProperty ( Property ) :
arg_types = { }
class IcebergProperty ( Property ) :
arg_types = { }
2025-02-13 21:19:58 +01:00
class InheritsProperty ( Property ) :
arg_types = { " expressions " : True }
2025-02-13 21:06:11 +01:00
class InputModelProperty ( Property ) :
arg_types = { " this " : True }
class OutputModelProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:52:09 +01:00
2025-02-13 15:40:23 +01:00
class IsolatedLoadingProperty ( Property ) :
2025-02-13 21:30:28 +01:00
arg_types = { " no " : False , " concurrent " : False , " target " : False }
2025-02-13 15:40:23 +01:00
class JournalProperty ( Property ) :
2025-02-13 15:57:23 +01:00
arg_types = {
" no " : False ,
" dual " : False ,
" before " : False ,
" local " : False ,
" after " : False ,
}
2025-02-13 14:40:43 +01:00
class LanguageProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 14:40:43 +01:00
2025-02-13 20:21:40 +01:00
# spark ddl
class ClusteredByProperty ( Property ) :
arg_types = { " expressions " : True , " sorted_by " : False , " buckets " : True }
2025-02-13 15:58:40 +01:00
class DictProperty ( Property ) :
arg_types = { " this " : True , " kind " : True , " settings " : False }
class DictSubProperty ( Property ) :
pass
class DictRange ( Property ) :
arg_types = { " this " : True , " min " : True , " max " : True }
2025-02-13 21:41:14 +01:00
class DynamicProperty ( Property ) :
arg_types = { }
2025-02-13 15:58:40 +01:00
# Clickhouse CREATE ... ON CLUSTER modifier
# https://clickhouse.com/docs/en/sql-reference/distributed-ddl
class OnCluster ( Property ) :
arg_types = { " this " : True }
2025-02-13 21:52:32 +01:00
# Clickhouse EMPTY table "property"
class EmptyProperty ( Property ) :
arg_types = { }
2025-02-13 15:40:23 +01:00
class LikeProperty ( Property ) :
arg_types = { " this " : True , " expressions " : False }
class LocationProperty ( Property ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True }
2025-02-13 14:43:32 +01:00
2025-02-13 21:29:39 +01:00
class LockProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class LockingProperty ( Property ) :
arg_types = {
" this " : False ,
" kind " : True ,
2025-02-13 21:08:10 +01:00
" for_or_in " : False ,
2025-02-13 15:40:23 +01:00
" lock_type " : True ,
" override " : False ,
}
class LogProperty ( Property ) :
arg_types = { " no " : True }
class MaterializedProperty ( Property ) :
arg_types = { " this " : False }
class MergeBlockRatioProperty ( Property ) :
arg_types = { " this " : False , " no " : False , " default " : False , " percent " : False }
class NoPrimaryIndexProperty ( Property ) :
2025-02-13 15:57:23 +01:00
arg_types = { }
2025-02-13 15:40:23 +01:00
2025-02-13 20:58:22 +01:00
class OnProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class OnCommitProperty ( Property ) :
2025-02-13 20:58:22 +01:00
arg_types = { " delete " : False }
2025-02-13 15:40:23 +01:00
class PartitionedByProperty ( Property ) :
2025-02-13 14:43:32 +01:00
arg_types = { " this " : True }
2025-02-13 21:16:09 +01:00
# https://www.postgresql.org/docs/current/sql-createtable.html
class PartitionBoundSpec ( Expression ) :
# this -> IN / MODULUS, expression -> REMAINDER, from_expressions -> FROM (...), to_expressions -> TO (...)
arg_types = {
" this " : False ,
" expression " : False ,
" from_expressions " : False ,
" to_expressions " : False ,
}
class PartitionedOfProperty ( Property ) :
# this -> parent_table (schema), expression -> FOR VALUES ... / DEFAULT
arg_types = { " this " : True , " expression " : True }
2025-02-13 21:52:32 +01:00
class StreamingTableProperty ( Property ) :
arg_types = { }
2025-02-13 21:06:11 +01:00
class RemoteWithConnectionModelProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class ReturnsProperty ( Property ) :
2025-02-13 21:35:32 +01:00
arg_types = { " this " : False , " is_table " : False , " table " : False , " null " : False }
class StrictProperty ( Property ) :
arg_types = { }
2025-02-13 15:40:23 +01:00
2025-02-13 15:52:09 +01:00
class RowFormatProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 14:58:37 +01:00
class RowFormatDelimitedProperty ( Property ) :
# https://cwiki.apache.org/confluence/display/hive/languagemanual+dml
arg_types = {
" fields " : False ,
" escaped " : False ,
" collection_items " : False ,
" map_keys " : False ,
" lines " : False ,
" null " : False ,
" serde " : False ,
}
class RowFormatSerdeProperty ( Property ) :
2025-02-13 20:46:55 +01:00
arg_types = { " this " : True , " serde_properties " : False }
# https://spark.apache.org/docs/3.1.2/sql-ref-syntax-qry-select-transform.html
class QueryTransform ( Expression ) :
arg_types = {
" expressions " : True ,
" command_script " : True ,
" schema " : False ,
" row_format_before " : False ,
" record_writer " : False ,
" row_format_after " : False ,
" record_reader " : False ,
}
2025-02-13 14:58:37 +01:00
2025-02-13 21:04:58 +01:00
class SampleProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 21:54:13 +01:00
# https://prestodb.io/docs/current/sql/create-view.html#synopsis
class SecurityProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class SchemaCommentProperty ( Property ) :
2025-02-13 15:08:15 +01:00
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class SerdeProperties ( Property ) :
2025-02-13 21:35:32 +01:00
arg_types = { " expressions " : True , " with " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 15:40:23 +01:00
class SetProperty ( Property ) :
arg_types = { " multi " : True }
2025-02-13 15:08:15 +01:00
2025-02-13 21:30:28 +01:00
class SharingProperty ( Property ) :
arg_types = { " this " : False }
2025-02-13 21:19:58 +01:00
class SetConfigProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:57:23 +01:00
class SettingsProperty ( Property ) :
arg_types = { " expressions " : True }
2025-02-13 15:40:23 +01:00
class SortKeyProperty ( Property ) :
arg_types = { " this " : True , " compound " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 21:19:14 +01:00
class SqlReadWriteProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class SqlSecurityProperty ( Property ) :
arg_types = { " definer " : True }
2025-02-13 15:08:15 +01:00
2025-02-13 15:52:09 +01:00
class StabilityProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:40:23 +01:00
class TemporaryProperty ( Property ) :
2025-02-13 21:29:39 +01:00
arg_types = { " this " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 21:41:14 +01:00
class SecureProperty ( Property ) :
arg_types = { }
2025-02-13 21:57:37 +01:00
# https://docs.snowflake.com/en/sql-reference/sql/create-table
class Tags ( ColumnConstraintKind , Property ) :
arg_types = { " expressions " : True }
2025-02-13 21:06:11 +01:00
class TransformModelProperty ( Property ) :
arg_types = { " expressions " : True }
2025-02-13 15:40:23 +01:00
class TransientProperty ( Property ) :
arg_types = { " this " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 21:30:28 +01:00
class UnloggedProperty ( Property ) :
arg_types = { }
# https://learn.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql?view=sql-server-ver16
class ViewAttributeProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:52:09 +01:00
class VolatileProperty ( Property ) :
arg_types = { " this " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 15:40:23 +01:00
class WithDataProperty ( Property ) :
arg_types = { " no " : True , " statistics " : False }
2025-02-13 15:08:15 +01:00
2025-02-13 15:40:23 +01:00
class WithJournalTableProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 15:26:26 +01:00
2025-02-13 21:41:14 +01:00
class WithSchemaBindingProperty ( Property ) :
arg_types = { " this " : True }
2025-02-13 21:17:09 +01:00
class WithSystemVersioningProperty ( Property ) :
2025-02-13 21:35:32 +01:00
arg_types = {
" on " : False ,
" this " : False ,
" data_consistency " : False ,
" retention_period " : False ,
" with " : True ,
}
2025-02-13 21:17:09 +01:00
2025-02-13 21:56:02 +01:00
class WithProcedureOptions ( Property ) :
arg_types = { " expressions " : True }
2025-02-13 21:57:37 +01:00
class EncodeProperty ( Property ) :
arg_types = { " this " : True , " properties " : False , " key " : False }
class IncludeProperty ( Property ) :
arg_types = { " this " : True , " alias " : False , " column_def " : False }
2025-02-13 14:31:47 +01:00
class Properties ( Expression ) :
2025-02-13 15:09:58 +01:00
arg_types = { " expressions " : True }
2025-02-13 14:31:47 +01:00
2025-02-13 14:56:25 +01:00
NAME_TO_PROPERTY = {
2025-02-13 15:09:58 +01:00
" ALGORITHM " : AlgorithmProperty ,
2025-02-13 14:31:47 +01:00
" AUTO_INCREMENT " : AutoIncrementProperty ,
2025-02-13 14:56:25 +01:00
" CHARACTER SET " : CharacterSetProperty ,
2025-02-13 20:21:40 +01:00
" CLUSTERED_BY " : ClusteredByProperty ,
2025-02-13 14:31:47 +01:00
" COLLATE " : CollateProperty ,
" COMMENT " : SchemaCommentProperty ,
2025-02-13 15:09:58 +01:00
" DEFINER " : DefinerProperty ,
2025-02-13 14:56:25 +01:00
" DISTKEY " : DistKeyProperty ,
2025-02-13 21:54:13 +01:00
" DISTRIBUTED_BY " : DistributedByProperty ,
2025-02-13 14:56:25 +01:00
" DISTSTYLE " : DistStyleProperty ,
2025-02-13 14:31:47 +01:00
" ENGINE " : EngineProperty ,
2025-02-13 14:56:25 +01:00
" EXECUTE AS " : ExecuteAsProperty ,
2025-02-13 14:31:47 +01:00
" FORMAT " : FileFormatProperty ,
2025-02-13 14:56:25 +01:00
" LANGUAGE " : LanguageProperty ,
2025-02-13 14:31:47 +01:00
" LOCATION " : LocationProperty ,
2025-02-13 21:29:39 +01:00
" LOCK " : LockProperty ,
2025-02-13 14:31:47 +01:00
" PARTITIONED_BY " : PartitionedByProperty ,
2025-02-13 14:56:25 +01:00
" RETURNS " : ReturnsProperty ,
2025-02-13 15:52:09 +01:00
" ROW_FORMAT " : RowFormatProperty ,
2025-02-13 14:54:32 +01:00
" SORTKEY " : SortKeyProperty ,
2025-02-13 21:57:37 +01:00
" ENCODE " : EncodeProperty ,
" INCLUDE " : IncludeProperty ,
2025-02-13 14:31:47 +01:00
}
2025-02-13 14:56:25 +01:00
PROPERTY_TO_NAME = { v : k for k , v in NAME_TO_PROPERTY . items ( ) }
2025-02-13 15:26:26 +01:00
# CREATE property locations
# Form: schema specified
# create [POST_CREATE]
# table a [POST_NAME]
# (b int) [POST_SCHEMA]
# with ([POST_WITH])
# index (b) [POST_INDEX]
#
# Form: alias selection
# create [POST_CREATE]
# table a [POST_NAME]
2025-02-13 15:40:23 +01:00
# as [POST_ALIAS] (select * from b) [POST_EXPRESSION]
2025-02-13 15:26:26 +01:00
# index (c) [POST_INDEX]
2025-02-13 15:09:58 +01:00
class Location ( AutoName ) :
POST_CREATE = auto ( )
2025-02-13 15:26:26 +01:00
POST_NAME = auto ( )
POST_SCHEMA = auto ( )
POST_WITH = auto ( )
POST_ALIAS = auto ( )
2025-02-13 15:40:23 +01:00
POST_EXPRESSION = auto ( )
2025-02-13 15:09:58 +01:00
POST_INDEX = auto ( )
UNSUPPORTED = auto ( )
2025-02-13 14:31:47 +01:00
@classmethod
2025-02-13 15:57:23 +01:00
def from_dict ( cls , properties_dict : t . Dict ) - > Properties :
2025-02-13 14:31:47 +01:00
expressions = [ ]
for key , value in properties_dict . items ( ) :
2025-02-13 14:56:25 +01:00
property_cls = cls . NAME_TO_PROPERTY . get ( key . upper ( ) )
if property_cls :
expressions . append ( property_cls ( this = convert ( value ) ) )
else :
expressions . append ( Property ( this = Literal . string ( key ) , value = convert ( value ) ) )
2025-02-13 14:31:47 +01:00
return cls ( expressions = expressions )
2025-02-13 06:15:54 +01:00
class Qualify ( Expression ) :
pass
2025-02-13 21:06:11 +01:00
class InputOutputFormat ( Expression ) :
arg_types = { " input_format " : False , " output_format " : False }
2025-02-13 15:07:05 +01:00
# https://www.ibm.com/docs/en/ias?topic=procedures-return-statement-in-sql
class Return ( Expression ) :
pass
2025-02-13 06:15:54 +01:00
class Reference ( Expression ) :
2025-02-13 15:07:05 +01:00
arg_types = { " this " : True , " expressions " : False , " options " : False }
2025-02-13 06:15:54 +01:00
class Tuple ( Expression ) :
arg_types = { " expressions " : False }
2025-02-13 15:53:39 +01:00
def isin (
2025-02-13 20:46:55 +01:00
self ,
* expressions : t . Any ,
query : t . Optional [ ExpOrStr ] = None ,
unnest : t . Optional [ ExpOrStr ] | t . Collection [ ExpOrStr ] = None ,
copy : bool = True ,
* * opts ,
2025-02-13 15:53:39 +01:00
) - > In :
return In (
2025-02-13 20:48:36 +01:00
this = maybe_copy ( self , copy ) ,
2025-02-13 15:53:39 +01:00
expressions = [ convert ( e , copy = copy ) for e in expressions ] ,
query = maybe_parse ( query , copy = copy , * * opts ) if query else None ,
2025-02-13 21:19:58 +01:00
unnest = (
Unnest (
expressions = [
maybe_parse ( t . cast ( ExpOrStr , e ) , copy = copy , * * opts )
for e in ensure_list ( unnest )
]
)
if unnest
else None
) ,
2025-02-13 15:53:39 +01:00
)
2025-02-13 06:15:54 +01:00
QUERY_MODIFIERS = {
2025-02-13 15:08:15 +01:00
" match " : False ,
2025-02-13 06:15:54 +01:00
" laterals " : False ,
" joins " : False ,
2025-02-13 20:58:22 +01:00
" connect " : False ,
2025-02-13 14:40:43 +01:00
" pivots " : False ,
2025-02-13 21:29:39 +01:00
" prewhere " : False ,
2025-02-13 06:15:54 +01:00
" where " : False ,
" group " : False ,
" having " : False ,
" qualify " : False ,
2025-02-13 15:01:55 +01:00
" windows " : False ,
2025-02-13 06:15:54 +01:00
" distribute " : False ,
" sort " : False ,
" cluster " : False ,
" order " : False ,
" limit " : False ,
" offset " : False ,
2025-02-13 15:57:23 +01:00
" locks " : False ,
2025-02-13 15:43:32 +01:00
" sample " : False ,
2025-02-13 15:57:23 +01:00
" settings " : False ,
" format " : False ,
2025-02-13 21:29:39 +01:00
" options " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 21:29:39 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/queries/option-clause-transact-sql?view=sql-server-ver16
# https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-query?view=sql-server-ver16
class QueryOption ( Expression ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 20:04:59 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
class WithTableHint ( Expression ) :
arg_types = { " expressions " : True }
# https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
class IndexTableHint ( Expression ) :
arg_types = { " this " : True , " expressions " : False , " target " : False }
2025-02-13 21:17:51 +01:00
# https://docs.snowflake.com/en/sql-reference/constructs/at-before
class HistoricalData ( Expression ) :
arg_types = { " this " : True , " kind " : True , " expression " : True }
2025-02-13 08:04:41 +01:00
class Table ( Expression ) :
arg_types = {
2025-02-13 21:20:36 +01:00
" this " : False ,
2025-02-13 14:48:46 +01:00
" alias " : False ,
2025-02-13 08:04:41 +01:00
" db " : False ,
" catalog " : False ,
" laterals " : False ,
" joins " : False ,
2025-02-13 14:40:43 +01:00
" pivots " : False ,
2025-02-13 15:03:38 +01:00
" hints " : False ,
2025-02-13 15:07:05 +01:00
" system_time " : False ,
2025-02-13 20:58:22 +01:00
" version " : False ,
2025-02-13 21:04:58 +01:00
" format " : False ,
" pattern " : False ,
2025-02-13 21:16:09 +01:00
" ordinality " : False ,
2025-02-13 21:17:51 +01:00
" when " : False ,
2025-02-13 21:29:39 +01:00
" only " : False ,
2025-02-13 21:32:41 +01:00
" partition " : False ,
2025-02-13 21:41:14 +01:00
" changes " : False ,
2025-02-13 21:43:00 +01:00
" rows_from " : False ,
2025-02-13 21:52:55 +01:00
" sample " : False ,
2025-02-13 15:07:05 +01:00
}
2025-02-13 20:21:40 +01:00
@property
def name ( self ) - > str :
if isinstance ( self . this , Func ) :
return " "
return self . this . name
2025-02-13 15:26:26 +01:00
@property
def db ( self ) - > str :
return self . text ( " db " )
@property
def catalog ( self ) - > str :
return self . text ( " catalog " )
2025-02-13 20:45:52 +01:00
@property
def selects ( self ) - > t . List [ Expression ] :
return [ ]
@property
def named_selects ( self ) - > t . List [ str ] :
return [ ]
2025-02-13 15:57:23 +01:00
@property
2025-02-13 21:04:58 +01:00
def parts ( self ) - > t . List [ Expression ] :
2025-02-13 15:57:23 +01:00
""" Return the parts of a table in order catalog, db, table. """
2025-02-13 21:04:58 +01:00
parts : t . List [ Expression ] = [ ]
2025-02-13 20:45:52 +01:00
for arg in ( " catalog " , " db " , " this " ) :
part = self . args . get ( arg )
2025-02-13 21:04:58 +01:00
if isinstance ( part , Dot ) :
2025-02-13 20:45:52 +01:00
parts . extend ( part . flatten ( ) )
2025-02-13 21:04:58 +01:00
elif isinstance ( part , Expression ) :
parts . append ( part )
2025-02-13 20:45:52 +01:00
return parts
2025-02-13 15:57:23 +01:00
2025-02-13 21:19:14 +01:00
def to_column ( self , copy : bool = True ) - > Alias | Column | Dot :
parts = self . parts
2025-02-13 21:56:02 +01:00
last_part = parts [ - 1 ]
if isinstance ( last_part , Identifier ) :
col = column ( * reversed ( parts [ 0 : 4 ] ) , fields = parts [ 4 : ] , copy = copy ) # type: ignore
else :
# This branch will be reached if a function or array is wrapped in a `Table`
col = last_part
2025-02-13 21:19:14 +01:00
alias = self . args . get ( " alias " )
if alias :
col = alias_ ( col , alias . this , copy = copy )
2025-02-13 21:56:02 +01:00
2025-02-13 21:19:14 +01:00
return col
2025-02-13 15:07:05 +01:00
2025-02-13 21:41:14 +01:00
class SetOperation ( Query ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" with " : False ,
" this " : True ,
" expression " : True ,
" distinct " : False ,
2025-02-13 20:58:22 +01:00
" by_name " : False ,
2025-02-13 06:15:54 +01:00
* * QUERY_MODIFIERS ,
}
2025-02-13 15:26:26 +01:00
def select (
2025-02-13 21:41:14 +01:00
self : S ,
2025-02-13 15:57:23 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
2025-02-13 15:26:26 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 21:41:14 +01:00
) - > S :
2025-02-13 21:29:39 +01:00
this = maybe_copy ( self , copy )
2025-02-13 15:26:26 +01:00
this . this . unnest ( ) . select ( * expressions , append = append , dialect = dialect , copy = False , * * opts )
this . expression . unnest ( ) . select (
* expressions , append = append , dialect = dialect , copy = False , * * opts
)
return this
2025-02-13 06:15:54 +01:00
@property
2025-02-13 20:45:52 +01:00
def named_selects ( self ) - > t . List [ str ] :
2025-02-13 14:46:58 +01:00
return self . this . unnest ( ) . named_selects
2025-02-13 15:40:23 +01:00
@property
def is_star ( self ) - > bool :
return self . this . is_star or self . expression . is_star
2025-02-13 14:46:58 +01:00
@property
2025-02-13 20:45:52 +01:00
def selects ( self ) - > t . List [ Expression ] :
2025-02-13 14:46:58 +01:00
return self . this . unnest ( ) . selects
2025-02-13 06:15:54 +01:00
@property
2025-02-13 21:52:32 +01:00
def left ( self ) - > Query :
2025-02-13 06:15:54 +01:00
return self . this
@property
2025-02-13 21:52:32 +01:00
def right ( self ) - > Query :
2025-02-13 06:15:54 +01:00
return self . expression
2025-02-13 21:41:14 +01:00
class Union ( SetOperation ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 21:41:14 +01:00
class Except ( SetOperation ) :
pass
class Intersect ( SetOperation ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 21:56:02 +01:00
class Update ( DML ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" with " : False ,
2025-02-13 14:58:37 +01:00
" this " : False ,
2025-02-13 06:15:54 +01:00
" expressions " : True ,
" from " : False ,
" where " : False ,
2025-02-13 15:43:32 +01:00
" returning " : False ,
2025-02-13 20:58:22 +01:00
" order " : False ,
2025-02-13 20:04:59 +01:00
" limit " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 21:55:40 +01:00
def table (
self , expression : ExpOrStr , dialect : DialectType = None , copy : bool = True , * * opts
) - > Update :
"""
Set the table to update .
Example :
>> > Update ( ) . table ( " my_table " ) . set_ ( " x = 1 " ) . sql ( )
' UPDATE my_table SET x = 1 '
Args :
expression : the SQL code strings to parse .
If a ` Table ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Table ` .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified Update expression .
"""
return _apply_builder (
expression = expression ,
instance = self ,
arg = " this " ,
into = Table ,
prefix = None ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
def set_ (
self ,
* expressions : ExpOrStr ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Update :
"""
Append to or set the SET expressions .
Example :
>> > Update ( ) . table ( " my_table " ) . set_ ( " x = 1 " ) . sql ( )
' UPDATE my_table SET x = 1 '
Args :
* expressions : the SQL code strings to parse .
If ` Expression ` instance ( s ) are passed , they will be used as - is .
Multiple expressions are combined with a comma .
append : if ` True ` , add the new expressions to any existing SET expressions .
Otherwise , this resets the expressions .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
"""
return _apply_list_builder (
* expressions ,
instance = self ,
arg = " expressions " ,
append = append ,
into = Expression ,
prefix = None ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
def where (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
"""
Append to or set the WHERE expressions .
Example :
>> > Update ( ) . table ( " tbl " ) . set_ ( " x = 1 " ) . where ( " x = ' a ' OR x < ' b ' " ) . sql ( )
" UPDATE tbl SET x = 1 WHERE x = ' a ' OR x < ' b ' "
Args :
* expressions : the SQL code strings to parse .
If an ` Expression ` instance is passed , it will be used as - is .
Multiple expressions are combined with an AND operator .
append : if ` True ` , AND the new expressions to any existing expression .
Otherwise , this resets the expression .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
Select : the modified expression .
"""
return _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " where " ,
append = append ,
into = Where ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
def from_ (
self ,
expression : t . Optional [ ExpOrStr ] = None ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Update :
"""
Set the FROM expression .
Example :
>> > Update ( ) . table ( " my_table " ) . set_ ( " x = 1 " ) . from_ ( " baz " ) . sql ( )
' UPDATE my_table SET x = 1 FROM baz '
Args :
expression : the SQL code strings to parse .
If a ` From ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` From ` .
If nothing is passed in then a from is not applied to the expression
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified Update expression .
"""
if not expression :
return maybe_copy ( self , copy )
return _apply_builder (
expression = expression ,
instance = self ,
arg = " from " ,
into = From ,
prefix = " FROM " ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
def with_ (
self ,
alias : ExpOrStr ,
as_ : ExpOrStr ,
recursive : t . Optional [ bool ] = None ,
materialized : t . Optional [ bool ] = None ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Update :
"""
Append to or set the common table expressions .
Example :
>> > Update ( ) . table ( " my_table " ) . set_ ( " x = 1 " ) . from_ ( " baz " ) . with_ ( " baz " , " SELECT id FROM foo " ) . sql ( )
' WITH baz AS (SELECT id FROM foo) UPDATE my_table SET x = 1 FROM baz '
Args :
alias : the SQL code string to parse as the table name .
If an ` Expression ` instance is passed , this is used as - is .
as_ : the SQL code string to parse as the table expression .
If an ` Expression ` instance is passed , it will be used as - is .
recursive : set the RECURSIVE part of the expression . Defaults to ` False ` .
materialized : set the MATERIALIZED part of the expression .
append : if ` True ` , add to any existing expressions .
Otherwise , this resets the expressions .
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
Returns :
The modified expression .
"""
return _apply_cte_builder (
self ,
alias ,
as_ ,
recursive = recursive ,
materialized = materialized ,
append = append ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 06:15:54 +01:00
2025-02-13 14:43:32 +01:00
class Values ( UDTF ) :
2025-02-13 21:16:09 +01:00
arg_types = { " expressions " : True , " alias " : False }
2025-02-13 06:15:54 +01:00
class Var ( Expression ) :
pass
2025-02-13 20:58:22 +01:00
class Version ( Expression ) :
"""
Time travel , iceberg , bigquery etc
https : / / trino . io / docs / current / connector / iceberg . html ? highlight = snapshot #using-snapshots
https : / / www . databricks . com / blog / 2019 / 02 / 04 / introducing - delta - time - travel - for - large - scale - data - lakes . html
https : / / cloud . google . com / bigquery / docs / reference / standard - sql / query - syntax #for_system_time_as_of
https : / / learn . microsoft . com / en - us / sql / relational - databases / tables / querying - data - in - a - system - versioned - temporal - table ? view = sql - server - ver16
this is either TIMESTAMP or VERSION
kind is ( " AS OF " , " BETWEEN " )
"""
arg_types = { " this " : True , " kind " : True , " expression " : False }
2025-02-13 06:15:54 +01:00
class Schema ( Expression ) :
2025-02-13 14:54:32 +01:00
arg_types = { " this " : False , " expressions " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/select.html
# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/SELECT.html
2025-02-13 15:09:58 +01:00
class Lock ( Expression ) :
2025-02-13 15:57:23 +01:00
arg_types = { " update " : True , " expressions " : False , " wait " : False }
2025-02-13 15:09:58 +01:00
2025-02-13 21:29:39 +01:00
class Select ( Query ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" with " : False ,
2025-02-13 15:48:10 +01:00
" kind " : False ,
2025-02-13 06:15:54 +01:00
" expressions " : False ,
" hint " : False ,
" distinct " : False ,
2025-02-13 14:56:25 +01:00
" into " : False ,
2025-02-13 06:15:54 +01:00
" from " : False ,
2025-02-13 21:56:02 +01:00
" operation_modifiers " : False ,
2025-02-13 06:15:54 +01:00
* * QUERY_MODIFIERS ,
}
2025-02-13 15:57:23 +01:00
def from_ (
self , expression : ExpOrStr , dialect : DialectType = None , copy : bool = True , * * opts
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Set the FROM expression .
Example :
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . sql ( )
' SELECT x FROM tbl '
Args :
2025-02-13 15:57:23 +01:00
expression : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If a ` From ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` From ` .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 15:57:23 +01:00
return _apply_builder (
expression = expression ,
2025-02-13 06:15:54 +01:00
instance = self ,
arg = " from " ,
into = From ,
2025-02-13 15:57:23 +01:00
prefix = " FROM " ,
2025-02-13 06:15:54 +01:00
dialect = dialect ,
2025-02-13 15:57:23 +01:00
copy = copy ,
2025-02-13 06:15:54 +01:00
* * opts ,
)
2025-02-13 15:57:23 +01:00
def group_by (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Set the GROUP BY expression .
Example :
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " , " COUNT(1) " ) . group_by ( " x " ) . sql ( )
' SELECT x, COUNT(1) FROM tbl GROUP BY x '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If a ` Group ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Group ` .
2025-02-13 14:48:46 +01:00
If nothing is passed in then a group by is not applied to the expression
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
2025-02-13 06:15:54 +01:00
Otherwise , this flattens all the ` Group ` expression into a single expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 14:48:46 +01:00
if not expressions :
return self if not copy else self . copy ( )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
return _apply_child_list_builder (
* expressions ,
instance = self ,
arg = " group " ,
append = append ,
copy = copy ,
prefix = " GROUP BY " ,
into = Group ,
dialect = dialect ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def sort_by (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Set the SORT BY expression .
Example :
2025-02-13 15:57:23 +01:00
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . sort_by ( " x DESC " ) . sql ( dialect = " hive " )
2025-02-13 06:15:54 +01:00
' SELECT x FROM tbl SORT BY x DESC '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If a ` Group ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` SORT ` .
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
2025-02-13 06:15:54 +01:00
Otherwise , this flattens all the ` Order ` expression into a single expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
return _apply_child_list_builder (
* expressions ,
instance = self ,
arg = " sort " ,
append = append ,
copy = copy ,
prefix = " SORT BY " ,
into = Sort ,
dialect = dialect ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def cluster_by (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Set the CLUSTER BY expression .
Example :
2025-02-13 15:57:23 +01:00
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . cluster_by ( " x DESC " ) . sql ( dialect = " hive " )
2025-02-13 06:15:54 +01:00
' SELECT x FROM tbl CLUSTER BY x DESC '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If a ` Group ` instance is passed , this is used as - is .
If another ` Expression ` instance is passed , it will be wrapped in a ` Cluster ` .
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
2025-02-13 06:15:54 +01:00
Otherwise , this flattens all the ` Order ` expression into a single expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
return _apply_child_list_builder (
* expressions ,
instance = self ,
arg = " cluster " ,
append = append ,
copy = copy ,
prefix = " CLUSTER BY " ,
into = Cluster ,
dialect = dialect ,
* * opts ,
)
2025-02-13 15:26:26 +01:00
def select (
self ,
2025-02-13 15:57:23 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
2025-02-13 15:26:26 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
return _apply_list_builder (
* expressions ,
instance = self ,
arg = " expressions " ,
append = append ,
dialect = dialect ,
2025-02-13 21:29:39 +01:00
into = Expression ,
2025-02-13 06:15:54 +01:00
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def lateral (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Append to or set the LATERAL expressions .
Example :
>> > Select ( ) . select ( " x " ) . lateral ( " OUTER explode(y) tbl2 AS z " ) . from_ ( " tbl " ) . sql ( )
' SELECT x FROM tbl LATERAL VIEW OUTER EXPLODE(y) tbl2 AS z '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
2025-02-13 06:15:54 +01:00
Otherwise , this resets the expressions .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
return _apply_list_builder (
* expressions ,
instance = self ,
arg = " laterals " ,
append = append ,
into = Lateral ,
prefix = " LATERAL VIEW " ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
def join (
self ,
2025-02-13 15:57:23 +01:00
expression : ExpOrStr ,
on : t . Optional [ ExpOrStr ] = None ,
2025-02-13 20:48:36 +01:00
using : t . Optional [ ExpOrStr | t . Collection [ ExpOrStr ] ] = None ,
2025-02-13 15:57:23 +01:00
append : bool = True ,
join_type : t . Optional [ str ] = None ,
join_alias : t . Optional [ Identifier | str ] = None ,
dialect : DialectType = None ,
copy : bool = True ,
2025-02-13 06:15:54 +01:00
* * opts ,
2025-02-13 14:53:05 +01:00
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Append to or set the JOIN expressions .
Example :
>> > Select ( ) . select ( " * " ) . from_ ( " tbl " ) . join ( " tbl2 " , on = " tbl1.y = tbl2.y " ) . sql ( )
' SELECT * FROM tbl JOIN tbl2 ON tbl1.y = tbl2.y '
2025-02-13 14:51:47 +01:00
>> > Select ( ) . select ( " 1 " ) . from_ ( " a " ) . join ( " b " , using = [ " x " , " y " , " z " ] ) . sql ( )
' SELECT 1 FROM a JOIN b USING (x, y, z) '
2025-02-13 06:15:54 +01:00
Use ` join_type ` to change the type of join :
>> > Select ( ) . select ( " * " ) . from_ ( " tbl " ) . join ( " tbl2 " , on = " tbl1.y = tbl2.y " , join_type = " left outer " ) . sql ( )
' SELECT * FROM tbl LEFT OUTER JOIN tbl2 ON tbl1.y = tbl2.y '
Args :
2025-02-13 15:57:23 +01:00
expression : the SQL code string to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
on : optionally specify the join " on " criteria as a SQL string .
2025-02-13 14:51:47 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
using : optionally specify the join " using " criteria as a SQL string .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
append : if ` True ` , add to any existing expressions .
2025-02-13 06:15:54 +01:00
Otherwise , this resets the expressions .
2025-02-13 15:57:23 +01:00
join_type : if set , alter the parsed join type .
join_alias : an optional alias for the joined source .
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
Select : the modified expression .
"""
2025-02-13 15:58:40 +01:00
parse_args : t . Dict [ str , t . Any ] = { " dialect " : dialect , * * opts }
2025-02-13 06:15:54 +01:00
try :
2025-02-13 15:58:40 +01:00
expression = maybe_parse ( expression , into = Join , prefix = " JOIN " , * * parse_args )
2025-02-13 06:15:54 +01:00
except ParseError :
2025-02-13 15:58:40 +01:00
expression = maybe_parse ( expression , into = ( Join , Expression ) , * * parse_args )
2025-02-13 06:15:54 +01:00
join = expression if isinstance ( expression , Join ) else Join ( this = expression )
if isinstance ( join . this , Select ) :
join . this . replace ( join . this . subquery ( ) )
if join_type :
2025-02-13 15:58:40 +01:00
method : t . Optional [ Token ]
2025-02-13 15:03:38 +01:00
side : t . Optional [ Token ]
kind : t . Optional [ Token ]
2025-02-13 15:58:40 +01:00
method , side , kind = maybe_parse ( join_type , into = " JOIN_TYPE " , * * parse_args ) # type: ignore
2025-02-13 15:03:38 +01:00
2025-02-13 15:58:40 +01:00
if method :
join . set ( " method " , method . text )
2025-02-13 06:15:54 +01:00
if side :
join . set ( " side " , side . text )
if kind :
join . set ( " kind " , kind . text )
if on :
2025-02-13 15:57:23 +01:00
on = and_ ( * ensure_list ( on ) , dialect = dialect , copy = copy , * * opts )
2025-02-13 06:15:54 +01:00
join . set ( " on " , on )
2025-02-13 14:51:47 +01:00
if using :
join = _apply_list_builder (
2025-02-13 15:57:23 +01:00
* ensure_list ( using ) ,
2025-02-13 14:51:47 +01:00
instance = join ,
arg = " using " ,
append = append ,
copy = copy ,
2025-02-13 20:48:36 +01:00
into = Identifier ,
2025-02-13 14:51:47 +01:00
* * opts ,
)
2025-02-13 06:15:54 +01:00
if join_alias :
2025-02-13 14:54:32 +01:00
join . set ( " this " , alias_ ( join . this , join_alias , table = True ) )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
return _apply_list_builder (
join ,
instance = self ,
arg = " joins " ,
append = append ,
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def where (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Append to or set the WHERE expressions .
Example :
>> > Select ( ) . select ( " x " ) . from_ ( " tbl " ) . where ( " x = ' a ' OR x < ' b ' " ) . sql ( )
" SELECT x FROM tbl WHERE x = ' a ' OR x < ' b ' "
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
Multiple expressions are combined with an AND operator .
2025-02-13 15:57:23 +01:00
append : if ` True ` , AND the new expressions to any existing expression .
2025-02-13 06:15:54 +01:00
Otherwise , this resets the expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
Select : the modified expression .
"""
return _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " where " ,
append = append ,
into = Where ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def having (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Append to or set the HAVING expressions .
Example :
>> > Select ( ) . select ( " x " , " COUNT(y) " ) . from_ ( " tbl " ) . group_by ( " x " ) . having ( " COUNT(y) > 3 " ) . sql ( )
' SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 3 '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an ` Expression ` instance is passed , it will be used as - is .
Multiple expressions are combined with an AND operator .
2025-02-13 15:57:23 +01:00
append : if ` True ` , AND the new expressions to any existing expression .
2025-02-13 06:15:54 +01:00
Otherwise , this resets the expression .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input expressions .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The modified Select expression .
2025-02-13 06:15:54 +01:00
"""
return _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " having " ,
append = append ,
into = Having ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def window (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 15:01:55 +01:00
return _apply_list_builder (
* expressions ,
instance = self ,
arg = " windows " ,
append = append ,
into = Window ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def qualify (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Select :
2025-02-13 15:43:32 +01:00
return _apply_conjunction_builder (
* expressions ,
instance = self ,
arg = " qualify " ,
append = append ,
into = Qualify ,
dialect = dialect ,
copy = copy ,
* * opts ,
)
2025-02-13 15:57:23 +01:00
def distinct (
self , * ons : t . Optional [ ExpOrStr ] , distinct : bool = True , copy : bool = True
) - > Select :
2025-02-13 06:15:54 +01:00
"""
Set the OFFSET expression .
Example :
>> > Select ( ) . from_ ( " tbl " ) . select ( " x " ) . distinct ( ) . sql ( )
' SELECT DISTINCT x FROM tbl '
Args :
2025-02-13 15:53:39 +01:00
ons : the expressions to distinct on
distinct : whether the Select should be distinct
copy : if ` False ` , modify this expression instance in - place .
2025-02-13 06:15:54 +01:00
Returns :
Select : the modified expression .
"""
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( self , copy )
2025-02-13 15:57:23 +01:00
on = Tuple ( expressions = [ maybe_parse ( on , copy = copy ) for on in ons if on ] ) if ons else None
2025-02-13 15:53:39 +01:00
instance . set ( " distinct " , Distinct ( on = on ) if distinct else None )
2025-02-13 06:15:54 +01:00
return instance
2025-02-13 15:57:23 +01:00
def ctas (
self ,
table : ExpOrStr ,
properties : t . Optional [ t . Dict ] = None ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Create :
2025-02-13 06:15:54 +01:00
"""
Convert this expression to a CREATE TABLE AS statement .
Example :
>> > Select ( ) . select ( " * " ) . from_ ( " tbl " ) . ctas ( " x " ) . sql ( )
' CREATE TABLE x AS SELECT * FROM tbl '
Args :
2025-02-13 15:57:23 +01:00
table : the SQL code string to parse as the table name .
2025-02-13 06:15:54 +01:00
If another ` Expression ` instance is passed , it will be used as - is .
2025-02-13 15:57:23 +01:00
properties : an optional mapping of table properties
dialect : the dialect used to parse the input table .
copy : if ` False ` , modify this expression instance in - place .
opts : other options to use to parse the input table .
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Create expression .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( self , copy )
2025-02-13 21:29:39 +01:00
table_expression = maybe_parse ( table , into = Table , dialect = dialect , * * opts )
2025-02-13 06:15:54 +01:00
properties_expression = None
if properties :
2025-02-13 14:31:47 +01:00
properties_expression = Properties . from_dict ( properties )
2025-02-13 06:15:54 +01:00
return Create (
this = table_expression ,
2025-02-13 21:19:14 +01:00
kind = " TABLE " ,
2025-02-13 06:15:54 +01:00
expression = instance ,
properties = properties_expression ,
)
2025-02-13 15:09:58 +01:00
def lock ( self , update : bool = True , copy : bool = True ) - > Select :
"""
Set the locking read mode for this expression .
Examples :
>> > Select ( ) . select ( " x " ) . from_ ( " tbl " ) . where ( " x = ' a ' " ) . lock ( ) . sql ( " mysql " )
" SELECT x FROM tbl WHERE x = ' a ' FOR UPDATE "
>> > Select ( ) . select ( " x " ) . from_ ( " tbl " ) . where ( " x = ' a ' " ) . lock ( update = False ) . sql ( " mysql " )
" SELECT x FROM tbl WHERE x = ' a ' FOR SHARE "
Args :
update : if ` True ` , the locking type will be ` FOR UPDATE ` , else it will be ` FOR SHARE ` .
copy : if ` False ` , modify this expression instance in - place .
Returns :
The modified expression .
"""
2025-02-13 20:48:36 +01:00
inst = maybe_copy ( self , copy )
2025-02-13 15:57:23 +01:00
inst . set ( " locks " , [ Lock ( update = update ) ] )
2025-02-13 15:09:58 +01:00
return inst
2025-02-13 16:00:51 +01:00
def hint ( self , * hints : ExpOrStr , dialect : DialectType = None , copy : bool = True ) - > Select :
"""
Set hints for this expression .
Examples :
>> > Select ( ) . select ( " x " ) . from_ ( " tbl " ) . hint ( " BROADCAST(y) " ) . sql ( dialect = " spark " )
' SELECT /*+ BROADCAST(y) */ x FROM tbl '
Args :
hints : The SQL code strings to parse as the hints .
If an ` Expression ` instance is passed , it will be used as - is .
dialect : The dialect used to parse the hints .
copy : If ` False ` , modify this expression instance in - place .
Returns :
The modified expression .
"""
2025-02-13 20:48:36 +01:00
inst = maybe_copy ( self , copy )
2025-02-13 16:00:51 +01:00
inst . set (
" hint " , Hint ( expressions = [ maybe_parse ( h , copy = copy , dialect = dialect ) for h in hints ] )
)
return inst
2025-02-13 06:15:54 +01:00
@property
2025-02-13 14:53:05 +01:00
def named_selects ( self ) - > t . List [ str ] :
2025-02-13 15:07:05 +01:00
return [ e . output_name for e in self . expressions if e . alias_or_name ]
2025-02-13 06:15:54 +01:00
2025-02-13 15:40:23 +01:00
@property
def is_star ( self ) - > bool :
return any ( expression . is_star for expression in self . expressions )
2025-02-13 06:15:54 +01:00
@property
2025-02-13 14:53:05 +01:00
def selects ( self ) - > t . List [ Expression ] :
2025-02-13 06:15:54 +01:00
return self . expressions
2025-02-13 21:41:14 +01:00
UNWRAPPED_QUERIES = ( Select , SetOperation )
2025-02-13 21:29:39 +01:00
class Subquery ( DerivedTable , Query ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" this " : True ,
" alias " : False ,
2025-02-13 14:54:32 +01:00
" with " : False ,
2025-02-13 06:15:54 +01:00
* * QUERY_MODIFIERS ,
}
def unnest ( self ) :
2025-02-13 21:29:39 +01:00
""" Returns the first non subquery. """
2025-02-13 06:15:54 +01:00
expression = self
while isinstance ( expression , Subquery ) :
expression = expression . this
return expression
2025-02-13 20:58:22 +01:00
def unwrap ( self ) - > Subquery :
expression = self
while expression . same_parent and expression . is_wrapper :
expression = t . cast ( Subquery , expression . parent )
return expression
2025-02-13 21:29:39 +01:00
def select (
self ,
* expressions : t . Optional [ ExpOrStr ] ,
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Subquery :
this = maybe_copy ( self , copy )
this . unnest ( ) . select ( * expressions , append = append , dialect = dialect , copy = False , * * opts )
return this
2025-02-13 20:58:22 +01:00
@property
def is_wrapper ( self ) - > bool :
"""
Whether this Subquery acts as a simple wrapper around another expression .
SELECT * FROM ( ( ( SELECT * FROM t ) ) )
^
This corresponds to a " wrapper " Subquery node
"""
return all ( v is None for k , v in self . args . items ( ) if k != " this " )
2025-02-13 15:40:23 +01:00
@property
def is_star ( self ) - > bool :
return self . this . is_star
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . alias
2025-02-13 06:15:54 +01:00
class TableSample ( Expression ) :
arg_types = {
2025-02-13 20:59:47 +01:00
" expressions " : False ,
2025-02-13 06:15:54 +01:00
" method " : False ,
" bucket_numerator " : False ,
" bucket_denominator " : False ,
" bucket_field " : False ,
" percent " : False ,
" rows " : False ,
" size " : False ,
2025-02-13 14:40:43 +01:00
" seed " : False ,
}
2025-02-13 15:07:05 +01:00
class Tag ( Expression ) :
""" Tags are used for generating arbitrary sql like SELECT <span>x</span>. """
arg_types = {
" this " : False ,
" prefix " : False ,
" postfix " : False ,
}
2025-02-13 15:57:23 +01:00
# Represents both the standard SQL PIVOT operator and DuckDB's "simplified" PIVOT syntax
# https://duckdb.org/docs/sql/statements/pivot
2025-02-13 14:40:43 +01:00
class Pivot ( Expression ) :
arg_types = {
" this " : False ,
2025-02-13 15:40:23 +01:00
" alias " : False ,
2025-02-13 21:06:11 +01:00
" expressions " : False ,
2025-02-13 15:57:23 +01:00
" field " : False ,
" unpivot " : False ,
" using " : False ,
" group " : False ,
2025-02-13 15:52:09 +01:00
" columns " : False ,
2025-02-13 20:55:29 +01:00
" include_nulls " : False ,
2025-02-13 21:52:32 +01:00
" default_on_null " : False ,
2025-02-13 06:15:54 +01:00
}
2025-02-13 21:19:14 +01:00
@property
def unpivot ( self ) - > bool :
return bool ( self . args . get ( " unpivot " ) )
2025-02-13 06:15:54 +01:00
2025-02-13 20:58:22 +01:00
class Window ( Condition ) :
2025-02-13 06:15:54 +01:00
arg_types = {
" this " : True ,
" partition_by " : False ,
" order " : False ,
" spec " : False ,
" alias " : False ,
2025-02-13 15:52:09 +01:00
" over " : False ,
" first " : False ,
2025-02-13 06:15:54 +01:00
}
class WindowSpec ( Expression ) :
arg_types = {
" kind " : False ,
" start " : False ,
" start_side " : False ,
" end " : False ,
" end_side " : False ,
}
2025-02-13 21:29:39 +01:00
class PreWhere ( Expression ) :
pass
2025-02-13 06:15:54 +01:00
class Where ( Expression ) :
pass
class Star ( Expression ) :
2025-02-13 21:35:32 +01:00
arg_types = { " except " : False , " replace " : False , " rename " : False }
2025-02-13 06:15:54 +01:00
@property
2025-02-13 15:23:26 +01:00
def name ( self ) - > str :
2025-02-13 06:15:54 +01:00
return " * "
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . name
2025-02-13 06:15:54 +01:00
2025-02-13 20:04:59 +01:00
class Parameter ( Condition ) :
2025-02-13 21:16:09 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 14:31:47 +01:00
2025-02-13 20:04:59 +01:00
class SessionParameter ( Condition ) :
2025-02-13 14:53:05 +01:00
arg_types = { " this " : True , " kind " : False }
2025-02-13 20:04:59 +01:00
class Placeholder ( Condition ) :
2025-02-13 15:57:23 +01:00
arg_types = { " this " : False , " kind " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
@property
def name ( self ) - > str :
return self . this or " ? "
2025-02-13 06:15:54 +01:00
class Null ( Condition ) :
2025-02-13 14:53:05 +01:00
arg_types : t . Dict [ str , t . Any ] = { }
2025-02-13 06:15:54 +01:00
2025-02-13 15:23:26 +01:00
@property
def name ( self ) - > str :
return " NULL "
2025-02-13 21:41:14 +01:00
def to_py ( self ) - > Lit [ None ] :
return None
2025-02-13 06:15:54 +01:00
class Boolean ( Condition ) :
2025-02-13 21:41:14 +01:00
def to_py ( self ) - > bool :
return self . this
2025-02-13 06:15:54 +01:00
2025-02-13 20:58:22 +01:00
class DataTypeParam ( Expression ) :
2025-02-13 15:57:23 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:29:39 +01:00
@property
def name ( self ) - > str :
return self . this . name
2025-02-13 15:57:23 +01:00
2025-02-13 21:52:32 +01:00
# The `nullable` arg is helpful when transpiling types from other dialects to ClickHouse, which
# assumes non-nullable types by default. Values `None` and `True` mean the type is nullable.
2025-02-13 06:15:54 +01:00
class DataType ( Expression ) :
arg_types = {
" this " : True ,
" expressions " : False ,
" nested " : False ,
2025-02-13 15:05:06 +01:00
" values " : False ,
2025-02-13 15:26:26 +01:00
" prefix " : False ,
2025-02-13 20:58:22 +01:00
" kind " : False ,
2025-02-13 21:52:32 +01:00
" nullable " : False ,
2025-02-13 06:15:54 +01:00
}
class Type ( AutoName ) :
2025-02-13 15:57:23 +01:00
ARRAY = auto ( )
2025-02-13 21:19:58 +01:00
AGGREGATEFUNCTION = auto ( )
SIMPLEAGGREGATEFUNCTION = auto ( )
2025-02-13 15:52:09 +01:00
BIGDECIMAL = auto ( )
2025-02-13 15:57:23 +01:00
BIGINT = auto ( )
BIGSERIAL = auto ( )
BINARY = auto ( )
2025-02-13 15:44:58 +01:00
BIT = auto ( )
2025-02-13 06:15:54 +01:00
BOOLEAN = auto ( )
2025-02-13 21:20:36 +01:00
BPCHAR = auto ( )
2025-02-13 15:57:23 +01:00
CHAR = auto ( )
2025-02-13 06:15:54 +01:00
DATE = auto ( )
2025-02-13 21:19:14 +01:00
DATE32 = auto ( )
2025-02-13 20:55:29 +01:00
DATEMULTIRANGE = auto ( )
DATERANGE = auto ( )
2025-02-13 06:15:54 +01:00
DATETIME = auto ( )
2025-02-13 15:57:23 +01:00
DATETIME64 = auto ( )
DECIMAL = auto ( )
2025-02-13 21:54:13 +01:00
DECIMAL32 = auto ( )
DECIMAL64 = auto ( )
DECIMAL128 = auto ( )
2025-02-13 21:56:19 +01:00
DECIMAL256 = auto ( )
2025-02-13 15:57:23 +01:00
DOUBLE = auto ( )
2025-02-13 20:55:29 +01:00
ENUM = auto ( )
ENUM8 = auto ( )
ENUM16 = auto ( )
FIXEDSTRING = auto ( )
2025-02-13 15:57:23 +01:00
FLOAT = auto ( )
2025-02-13 06:15:54 +01:00
GEOGRAPHY = auto ( )
2025-02-13 08:04:41 +01:00
GEOMETRY = auto ( )
2025-02-13 21:56:19 +01:00
POINT = auto ( )
RING = auto ( )
LINESTRING = auto ( )
MULTILINESTRING = auto ( )
POLYGON = auto ( )
MULTIPOLYGON = auto ( )
2025-02-13 08:04:41 +01:00
HLLSKETCH = auto ( )
2025-02-13 14:56:25 +01:00
HSTORE = auto ( )
2025-02-13 14:31:47 +01:00
IMAGE = auto ( )
2025-02-13 15:40:23 +01:00
INET = auto ( )
2025-02-13 15:57:23 +01:00
INT = auto ( )
INT128 = auto ( )
INT256 = auto ( )
2025-02-13 20:55:29 +01:00
INT4MULTIRANGE = auto ( )
INT4RANGE = auto ( )
INT8MULTIRANGE = auto ( )
INT8RANGE = auto ( )
2025-02-13 15:57:23 +01:00
INTERVAL = auto ( )
2025-02-13 20:55:29 +01:00
IPADDRESS = auto ( )
IPPREFIX = auto ( )
2025-02-13 21:19:14 +01:00
IPV4 = auto ( )
IPV6 = auto ( )
2025-02-13 15:57:23 +01:00
JSON = auto ( )
JSONB = auto ( )
2025-02-13 21:37:40 +01:00
LIST = auto ( )
2025-02-13 15:57:23 +01:00
LONGBLOB = auto ( )
LONGTEXT = auto ( )
2025-02-13 20:55:29 +01:00
LOWCARDINALITY = auto ( )
2025-02-13 15:57:23 +01:00
MAP = auto ( )
MEDIUMBLOB = auto ( )
2025-02-13 20:58:22 +01:00
MEDIUMINT = auto ( )
2025-02-13 15:57:23 +01:00
MEDIUMTEXT = auto ( )
MONEY = auto ( )
2025-02-13 21:30:28 +01:00
NAME = auto ( )
2025-02-13 15:57:23 +01:00
NCHAR = auto ( )
2025-02-13 20:55:29 +01:00
NESTED = auto ( )
2025-02-13 14:45:11 +01:00
NULL = auto ( )
2025-02-13 20:55:29 +01:00
NUMMULTIRANGE = auto ( )
NUMRANGE = auto ( )
2025-02-13 15:57:23 +01:00
NVARCHAR = auto ( )
OBJECT = auto ( )
2025-02-13 21:55:19 +01:00
RANGE = auto ( )
2025-02-13 15:57:23 +01:00
ROWVERSION = auto ( )
SERIAL = auto ( )
2025-02-13 16:00:51 +01:00
SET = auto ( )
2025-02-13 15:57:23 +01:00
SMALLINT = auto ( )
SMALLMONEY = auto ( )
SMALLSERIAL = auto ( )
STRUCT = auto ( )
SUPER = auto ( )
TEXT = auto ( )
2025-02-13 21:01:12 +01:00
TINYBLOB = auto ( )
TINYTEXT = auto ( )
2025-02-13 15:57:23 +01:00
TIME = auto ( )
2025-02-13 20:55:29 +01:00
TIMETZ = auto ( )
2025-02-13 15:57:23 +01:00
TIMESTAMP = auto ( )
2025-02-13 21:33:25 +01:00
TIMESTAMPNTZ = auto ( )
2025-02-13 15:57:23 +01:00
TIMESTAMPLTZ = auto ( )
2025-02-13 20:55:29 +01:00
TIMESTAMPTZ = auto ( )
2025-02-13 21:08:10 +01:00
TIMESTAMP_S = auto ( )
TIMESTAMP_MS = auto ( )
TIMESTAMP_NS = auto ( )
2025-02-13 15:57:23 +01:00
TINYINT = auto ( )
2025-02-13 20:55:29 +01:00
TSMULTIRANGE = auto ( )
TSRANGE = auto ( )
TSTZMULTIRANGE = auto ( )
TSTZRANGE = auto ( )
2025-02-13 15:57:23 +01:00
UBIGINT = auto ( )
UINT = auto ( )
UINT128 = auto ( )
UINT256 = auto ( )
2025-02-13 20:59:47 +01:00
UMEDIUMINT = auto ( )
2025-02-13 21:04:58 +01:00
UDECIMAL = auto ( )
2025-02-13 21:55:19 +01:00
UNION = auto ( )
2025-02-13 15:57:23 +01:00
UNIQUEIDENTIFIER = auto ( )
2025-02-13 20:55:29 +01:00
UNKNOWN = auto ( ) # Sentinel value, useful for type annotation
2025-02-13 20:04:59 +01:00
USERDEFINED = " USER-DEFINED "
2025-02-13 20:55:29 +01:00
USMALLINT = auto ( )
UTINYINT = auto ( )
2025-02-13 15:57:23 +01:00
UUID = auto ( )
VARBINARY = auto ( )
VARCHAR = auto ( )
VARIANT = auto ( )
2025-02-13 21:41:14 +01:00
VECTOR = auto ( )
2025-02-13 15:57:23 +01:00
XML = auto ( )
2025-02-13 20:58:22 +01:00
YEAR = auto ( )
2025-02-13 21:33:25 +01:00
TDIGEST = auto ( )
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
STRUCT_TYPES = {
Type . NESTED ,
Type . OBJECT ,
Type . STRUCT ,
2025-02-13 21:55:19 +01:00
Type . UNION ,
2025-02-13 21:30:28 +01:00
}
2025-02-13 21:54:47 +01:00
ARRAY_TYPES = {
Type . ARRAY ,
Type . LIST ,
}
2025-02-13 21:30:28 +01:00
NESTED_TYPES = {
* STRUCT_TYPES ,
2025-02-13 21:54:47 +01:00
* ARRAY_TYPES ,
2025-02-13 21:30:28 +01:00
Type . MAP ,
}
2025-02-13 14:54:32 +01:00
TEXT_TYPES = {
Type . CHAR ,
Type . NCHAR ,
Type . NVARCHAR ,
Type . TEXT ,
2025-02-13 21:30:28 +01:00
Type . VARCHAR ,
Type . NAME ,
2025-02-13 14:54:32 +01:00
}
2025-02-13 21:30:28 +01:00
SIGNED_INTEGER_TYPES = {
2025-02-13 14:54:32 +01:00
Type . BIGINT ,
2025-02-13 21:30:28 +01:00
Type . INT ,
2025-02-13 15:53:39 +01:00
Type . INT128 ,
Type . INT256 ,
2025-02-13 21:30:28 +01:00
Type . MEDIUMINT ,
Type . SMALLINT ,
Type . TINYINT ,
}
UNSIGNED_INTEGER_TYPES = {
Type . UBIGINT ,
Type . UINT ,
Type . UINT128 ,
Type . UINT256 ,
Type . UMEDIUMINT ,
Type . USMALLINT ,
Type . UTINYINT ,
}
INTEGER_TYPES = {
* SIGNED_INTEGER_TYPES ,
* UNSIGNED_INTEGER_TYPES ,
2025-02-13 21:17:09 +01:00
Type . BIT ,
2025-02-13 14:58:37 +01:00
}
FLOAT_TYPES = {
2025-02-13 14:54:32 +01:00
Type . DOUBLE ,
2025-02-13 21:30:28 +01:00
Type . FLOAT ,
}
REAL_TYPES = {
* FLOAT_TYPES ,
Type . BIGDECIMAL ,
Type . DECIMAL ,
2025-02-13 21:54:13 +01:00
Type . DECIMAL32 ,
Type . DECIMAL64 ,
Type . DECIMAL128 ,
2025-02-13 21:56:19 +01:00
Type . DECIMAL256 ,
2025-02-13 21:30:28 +01:00
Type . MONEY ,
Type . SMALLMONEY ,
Type . UDECIMAL ,
2025-02-13 14:54:32 +01:00
}
2025-02-13 20:58:22 +01:00
NUMERIC_TYPES = {
* INTEGER_TYPES ,
2025-02-13 21:30:28 +01:00
* REAL_TYPES ,
2025-02-13 20:58:22 +01:00
}
2025-02-13 14:58:37 +01:00
2025-02-13 14:54:32 +01:00
TEMPORAL_TYPES = {
2025-02-13 21:30:28 +01:00
Type . DATE ,
Type . DATE32 ,
Type . DATETIME ,
Type . DATETIME64 ,
2025-02-13 16:00:51 +01:00
Type . TIME ,
2025-02-13 14:54:32 +01:00
Type . TIMESTAMP ,
2025-02-13 21:33:25 +01:00
Type . TIMESTAMPNTZ ,
2025-02-13 14:54:32 +01:00
Type . TIMESTAMPLTZ ,
2025-02-13 21:30:28 +01:00
Type . TIMESTAMPTZ ,
2025-02-13 21:08:10 +01:00
Type . TIMESTAMP_MS ,
Type . TIMESTAMP_NS ,
2025-02-13 21:30:28 +01:00
Type . TIMESTAMP_S ,
Type . TIMETZ ,
2025-02-13 14:54:32 +01:00
}
2025-02-13 06:15:54 +01:00
@classmethod
2025-02-13 15:03:38 +01:00
def build (
2025-02-13 20:58:22 +01:00
cls ,
2025-02-13 21:17:09 +01:00
dtype : DATA_TYPE ,
2025-02-13 20:58:22 +01:00
dialect : DialectType = None ,
udt : bool = False ,
2025-02-13 21:20:36 +01:00
copy : bool = True ,
2025-02-13 20:58:22 +01:00
* * kwargs ,
2025-02-13 15:03:38 +01:00
) - > DataType :
2025-02-13 20:58:22 +01:00
"""
Constructs a DataType object .
Args :
dtype : the data type of interest .
dialect : the dialect to use for parsing ` dtype ` , in case it ' s a string.
udt : when set to True , ` dtype ` will be used as - is if it can ' t be parsed into a
DataType , thus creating a user - defined type .
2025-02-13 21:28:36 +01:00
copy : whether to copy the data type .
2025-02-13 21:20:36 +01:00
kwargs : additional arguments to pass in the constructor of DataType .
2025-02-13 20:58:22 +01:00
Returns :
The constructed DataType object .
"""
2025-02-13 15:03:38 +01:00
from sqlglot import parse_one
if isinstance ( dtype , str ) :
2025-02-13 20:58:22 +01:00
if dtype . upper ( ) == " UNKNOWN " :
return DataType ( this = DataType . Type . UNKNOWN , * * kwargs )
2025-02-13 15:57:23 +01:00
2025-02-13 20:58:22 +01:00
try :
2025-02-13 21:09:41 +01:00
data_type_exp = parse_one (
dtype , read = dialect , into = DataType , error_level = ErrorLevel . IGNORE
)
2025-02-13 20:58:22 +01:00
except ParseError :
if udt :
return DataType ( this = DataType . Type . USERDEFINED , kind = dtype , * * kwargs )
raise
2025-02-13 15:03:38 +01:00
elif isinstance ( dtype , DataType . Type ) :
data_type_exp = DataType ( this = dtype )
2025-02-13 15:09:58 +01:00
elif isinstance ( dtype , DataType ) :
2025-02-13 21:20:36 +01:00
return maybe_copy ( dtype , copy )
2025-02-13 15:03:38 +01:00
else :
raise ValueError ( f " Invalid data type: { type ( dtype ) } . Expected str or DataType.Type " )
2025-02-13 15:57:23 +01:00
2025-02-13 15:03:38 +01:00
return DataType ( * * { * * data_type_exp . args , * * kwargs } )
2025-02-13 06:15:54 +01:00
2025-02-13 21:52:32 +01:00
def is_type ( self , * dtypes : DATA_TYPE , check_nullable : bool = False ) - > bool :
2025-02-13 20:58:22 +01:00
"""
Checks whether this DataType matches one of the provided data types . Nested types or precision
will be compared using " structural equivalence " semantics , so e . g . array < int > != array < float > .
Args :
dtypes : the data types to compare this DataType to .
2025-02-13 21:52:32 +01:00
check_nullable : whether to take the NULLABLE type constructor into account for the comparison .
If false , it means that NULLABLE < INT > is equivalent to INT .
2025-02-13 20:58:22 +01:00
Returns :
True , if and only if there is a type in ` dtypes ` which is equal to this DataType .
"""
2025-02-13 21:54:47 +01:00
self_is_nullable = self . args . get ( " nullable " )
2025-02-13 20:58:22 +01:00
for dtype in dtypes :
2025-02-13 21:52:32 +01:00
other_type = DataType . build ( dtype , copy = False , udt = True )
2025-02-13 21:54:47 +01:00
other_is_nullable = other_type . args . get ( " nullable " )
2025-02-13 20:58:22 +01:00
if (
2025-02-13 21:52:32 +01:00
other_type . expressions
2025-02-13 21:54:47 +01:00
or ( check_nullable and ( self_is_nullable or other_is_nullable ) )
or self . this == DataType . Type . USERDEFINED
2025-02-13 21:52:32 +01:00
or other_type . this == DataType . Type . USERDEFINED
2025-02-13 20:58:22 +01:00
) :
2025-02-13 21:54:47 +01:00
matches = self == other_type
2025-02-13 20:58:22 +01:00
else :
2025-02-13 21:54:47 +01:00
matches = self . this == other_type . this
2025-02-13 20:58:22 +01:00
if matches :
return True
return False
2025-02-13 15:09:58 +01:00
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
DATA_TYPE = t . Union [ str , DataType , DataType . Type ]
2025-02-13 15:01:55 +01:00
# https://www.postgresql.org/docs/15/datatype-pseudo.html
2025-02-13 21:04:58 +01:00
class PseudoType ( DataType ) :
arg_types = { " this " : True }
2025-02-13 15:01:55 +01:00
2025-02-13 20:58:22 +01:00
# https://www.postgresql.org/docs/15/datatype-oid.html
2025-02-13 21:04:58 +01:00
class ObjectIdentifier ( DataType ) :
arg_types = { " this " : True }
2025-02-13 20:58:22 +01:00
2025-02-13 06:15:54 +01:00
# WHERE x <OP> EXISTS|ALL|ANY|SOME(SELECT ...)
class SubqueryPredicate ( Predicate ) :
pass
class All ( SubqueryPredicate ) :
pass
class Any ( SubqueryPredicate ) :
pass
2025-02-13 14:54:32 +01:00
# Commands to interact with the databases or engines. For most of the command
# expressions we parse whatever comes after the command's name as a string.
2025-02-13 06:15:54 +01:00
class Command ( Expression ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 15:01:55 +01:00
class Transaction ( Expression ) :
2025-02-13 20:46:55 +01:00
arg_types = { " this " : False , " modes " : False , " mark " : False }
2025-02-13 14:54:32 +01:00
2025-02-13 15:01:55 +01:00
class Commit ( Expression ) :
2025-02-13 20:46:55 +01:00
arg_types = { " chain " : False , " this " : False , " durability " : False }
2025-02-13 14:54:32 +01:00
2025-02-13 15:01:55 +01:00
class Rollback ( Expression ) :
2025-02-13 20:46:55 +01:00
arg_types = { " savepoint " : False , " this " : False }
2025-02-13 14:54:32 +01:00
2025-02-13 21:52:32 +01:00
class Alter ( Expression ) :
2025-02-13 21:29:39 +01:00
arg_types = {
" this " : True ,
2025-02-13 21:52:32 +01:00
" kind " : True ,
2025-02-13 21:29:39 +01:00
" actions " : True ,
" exists " : False ,
" only " : False ,
" options " : False ,
2025-02-13 21:35:32 +01:00
" cluster " : False ,
2025-02-13 21:54:13 +01:00
" not_valid " : False ,
2025-02-13 21:29:39 +01:00
}
2025-02-13 15:07:05 +01:00
2025-02-13 21:55:19 +01:00
@property
def kind ( self ) - > t . Optional [ str ] :
kind = self . args . get ( " kind " )
return kind and kind . upper ( )
@property
def actions ( self ) - > t . List [ Expression ] :
return self . args . get ( " actions " ) or [ ]
2025-02-13 15:07:05 +01:00
class AddConstraint ( Expression ) :
2025-02-13 21:29:39 +01:00
arg_types = { " expressions " : True }
2025-02-13 15:01:55 +01:00
2025-02-13 21:57:37 +01:00
class AttachOption ( Expression ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 15:08:15 +01:00
class DropPartition ( Expression ) :
arg_types = { " expressions " : True , " exists " : False }
2025-02-13 21:35:32 +01:00
# https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#replace-partition
class ReplacePartition ( Expression ) :
arg_types = { " expression " : True , " source " : True }
2025-02-13 14:54:32 +01:00
# Binary expressions like (ADD a b)
2025-02-13 15:52:09 +01:00
class Binary ( Condition ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True }
@property
2025-02-13 21:09:41 +01:00
def left ( self ) - > Expression :
2025-02-13 06:15:54 +01:00
return self . this
@property
2025-02-13 21:09:41 +01:00
def right ( self ) - > Expression :
2025-02-13 06:15:54 +01:00
return self . expression
class Add ( Binary ) :
pass
2025-02-13 15:52:09 +01:00
class Connector ( Binary ) :
2025-02-13 06:15:54 +01:00
pass
class And ( Connector ) :
pass
class Or ( Connector ) :
pass
class BitwiseAnd ( Binary ) :
pass
class BitwiseLeftShift ( Binary ) :
pass
class BitwiseOr ( Binary ) :
pass
class BitwiseRightShift ( Binary ) :
pass
class BitwiseXor ( Binary ) :
pass
class Div ( Binary ) :
2025-02-13 21:17:09 +01:00
arg_types = { " this " : True , " expression " : True , " typed " : False , " safe " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:42:13 +01:00
class Overlaps ( Binary ) :
pass
2025-02-13 06:15:54 +01:00
class Dot ( Binary ) :
2025-02-13 21:29:39 +01:00
@property
def is_star ( self ) - > bool :
return self . expression . is_star
2025-02-13 15:23:26 +01:00
@property
def name ( self ) - > str :
return self . expression . name
2025-02-13 06:15:54 +01:00
2025-02-13 16:00:51 +01:00
@property
def output_name ( self ) - > str :
return self . name
2025-02-13 15:46:19 +01:00
@classmethod
def build ( self , expressions : t . Sequence [ Expression ] ) - > Dot :
""" Build a Dot object with a sequence of expressions. """
if len ( expressions ) < 2 :
2025-02-13 21:20:36 +01:00
raise ValueError ( " Dot requires >= 2 expressions. " )
2025-02-13 15:46:19 +01:00
2025-02-13 21:01:12 +01:00
return t . cast ( Dot , reduce ( lambda x , y : Dot ( this = x , expression = y ) , expressions ) )
2025-02-13 15:46:19 +01:00
2025-02-13 21:17:09 +01:00
@property
def parts ( self ) - > t . List [ Expression ] :
""" Return the parts of a table / column in order catalog, db, table. """
this , * parts = self . flatten ( )
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
parts . reverse ( )
2025-02-13 06:15:54 +01:00
2025-02-13 21:31:23 +01:00
for arg in COLUMN_PARTS :
2025-02-13 21:17:09 +01:00
part = this . args . get ( arg )
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
if isinstance ( part , Expression ) :
parts . append ( part )
parts . reverse ( )
return parts
class DPipe ( Binary ) :
arg_types = { " this " : True , " expression " : True , " safe " : False }
2025-02-13 16:00:51 +01:00
2025-02-13 06:15:54 +01:00
class EQ ( Binary , Predicate ) :
pass
2025-02-13 14:53:05 +01:00
class NullSafeEQ ( Binary , Predicate ) :
pass
class NullSafeNEQ ( Binary , Predicate ) :
pass
2025-02-13 21:17:09 +01:00
# Represents e.g. := in DuckDB which is mostly used for setting parameters
class PropertyEQ ( Binary ) :
pass
2025-02-13 14:53:05 +01:00
class Distance ( Binary ) :
pass
2025-02-13 06:15:54 +01:00
class Escape ( Binary ) :
pass
2025-02-13 15:08:15 +01:00
class Glob ( Binary , Predicate ) :
pass
2025-02-13 06:15:54 +01:00
class GT ( Binary , Predicate ) :
pass
class GTE ( Binary , Predicate ) :
pass
class ILike ( Binary , Predicate ) :
pass
2025-02-13 15:26:26 +01:00
class ILikeAny ( Binary , Predicate ) :
pass
2025-02-13 06:15:54 +01:00
class IntDiv ( Binary ) :
pass
class Is ( Binary , Predicate ) :
pass
2025-02-13 14:53:05 +01:00
class Kwarg ( Binary ) :
""" Kwarg in special functions like func(kwarg => y). """
2025-02-13 08:04:41 +01:00
2025-02-13 14:53:05 +01:00
class Like ( Binary , Predicate ) :
2025-02-13 08:04:41 +01:00
pass
2025-02-13 15:26:26 +01:00
class LikeAny ( Binary , Predicate ) :
pass
2025-02-13 06:15:54 +01:00
class LT ( Binary , Predicate ) :
pass
class LTE ( Binary , Predicate ) :
pass
class Mod ( Binary ) :
pass
class Mul ( Binary ) :
pass
class NEQ ( Binary , Predicate ) :
pass
2025-02-13 21:17:09 +01:00
# https://www.postgresql.org/docs/current/ddl-schemas.html#DDL-SCHEMAS-PATH
class Operator ( Binary ) :
arg_types = { " this " : True , " operator " : True , " expression " : True }
2025-02-13 14:53:05 +01:00
class SimilarTo ( Binary , Predicate ) :
pass
2025-02-13 15:01:55 +01:00
class Slice ( Binary ) :
arg_types = { " this " : False , " expression " : False }
2025-02-13 06:15:54 +01:00
class Sub ( Binary ) :
pass
# Unary Expressions
# (NOT a)
2025-02-13 15:52:09 +01:00
class Unary ( Condition ) :
2025-02-13 06:15:54 +01:00
pass
class BitwiseNot ( Unary ) :
pass
2025-02-13 15:52:09 +01:00
class Not ( Unary ) :
2025-02-13 06:15:54 +01:00
pass
2025-02-13 15:52:09 +01:00
class Paren ( Unary ) :
2025-02-13 16:00:51 +01:00
@property
def output_name ( self ) - > str :
return self . this . name
2025-02-13 06:15:54 +01:00
class Neg ( Unary ) :
2025-02-13 21:41:14 +01:00
def to_py ( self ) - > int | Decimal :
if self . is_number :
return self . this . to_py ( ) * - 1
return super ( ) . to_py ( )
2025-02-13 06:15:54 +01:00
class Alias ( Expression ) :
arg_types = { " this " : True , " alias " : False }
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . alias
2025-02-13 06:15:54 +01:00
2025-02-13 21:19:14 +01:00
# BigQuery requires the UNPIVOT column list aliases to be either strings or ints, but
# other dialects require identifiers. This enables us to transpile between them easily.
class PivotAlias ( Alias ) :
pass
2025-02-13 21:52:32 +01:00
# Represents Snowflake's ANY [ ORDER BY ... ] syntax
# https://docs.snowflake.com/en/sql-reference/constructs/pivot
class PivotAny ( Expression ) :
arg_types = { " this " : False }
2025-02-13 06:15:54 +01:00
class Aliases ( Expression ) :
arg_types = { " this " : True , " expressions " : True }
@property
def aliases ( self ) :
return self . expressions
2025-02-13 21:19:14 +01:00
# https://docs.aws.amazon.com/redshift/latest/dg/query-super.html
class AtIndex ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class AtTimeZone ( Expression ) :
arg_types = { " this " : True , " zone " : True }
2025-02-13 21:19:58 +01:00
class FromTimeZone ( Expression ) :
arg_types = { " this " : True , " zone " : True }
2025-02-13 06:15:54 +01:00
class Between ( Predicate ) :
arg_types = { " this " : True , " low " : True , " high " : True }
class Bracket ( Condition ) :
2025-02-13 21:17:09 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#array_subscript_operator
2025-02-13 21:31:23 +01:00
arg_types = {
" this " : True ,
" expressions " : True ,
" offset " : False ,
" safe " : False ,
" returns_list_for_maps " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:01:12 +01:00
@property
def output_name ( self ) - > str :
if len ( self . expressions ) == 1 :
return self . expressions [ 0 ] . output_name
return super ( ) . output_name
2025-02-13 06:15:54 +01:00
class Distinct ( Expression ) :
2025-02-13 14:46:58 +01:00
arg_types = { " expressions " : False , " on " : False }
2025-02-13 06:15:54 +01:00
class In ( Predicate ) :
2025-02-13 14:53:05 +01:00
arg_types = {
" this " : True ,
" expressions " : False ,
" query " : False ,
" unnest " : False ,
" field " : False ,
2025-02-13 15:03:38 +01:00
" is_global " : False ,
2025-02-13 14:53:05 +01:00
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#for-in
class ForIn ( Expression ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class TimeUnit ( Expression ) :
""" Automatically converts unit arg into a var. """
arg_types = { " unit " : False }
2025-02-13 21:09:41 +01:00
UNABBREVIATED_UNIT_NAME = {
2025-02-13 21:19:14 +01:00
" D " : " DAY " ,
" H " : " HOUR " ,
" M " : " MINUTE " ,
" MS " : " MILLISECOND " ,
" NS " : " NANOSECOND " ,
" Q " : " QUARTER " ,
" S " : " SECOND " ,
" US " : " MICROSECOND " ,
" W " : " WEEK " ,
" Y " : " YEAR " ,
2025-02-13 21:09:41 +01:00
}
VAR_LIKE = ( Column , Literal , Var )
2025-02-13 06:15:54 +01:00
def __init__ ( self , * * args ) :
unit = args . get ( " unit " )
2025-02-13 21:09:41 +01:00
if isinstance ( unit , self . VAR_LIKE ) :
2025-02-13 21:19:14 +01:00
args [ " unit " ] = Var (
this = ( self . UNABBREVIATED_UNIT_NAME . get ( unit . name ) or unit . name ) . upper ( )
)
2025-02-13 06:15:54 +01:00
elif isinstance ( unit , Week ) :
2025-02-13 21:19:14 +01:00
unit . set ( " this " , Var ( this = unit . this . name . upper ( ) ) )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
super ( ) . __init__ ( * * args )
2025-02-13 21:03:38 +01:00
@property
2025-02-13 21:30:28 +01:00
def unit ( self ) - > t . Optional [ Var | IntervalSpan ] :
2025-02-13 21:03:38 +01:00
return self . args . get ( " unit " )
2025-02-13 06:15:54 +01:00
2025-02-13 21:04:58 +01:00
class IntervalOp ( TimeUnit ) :
2025-02-13 21:54:47 +01:00
arg_types = { " unit " : False , " expression " : True }
2025-02-13 21:04:58 +01:00
def interval ( self ) :
return Interval (
this = self . expression . copy ( ) ,
2025-02-13 21:54:47 +01:00
unit = self . unit . copy ( ) if self . unit else None ,
2025-02-13 21:04:58 +01:00
)
2025-02-13 20:55:29 +01:00
# https://www.oracletutorial.com/oracle-basics/oracle-interval/
# https://trino.io/docs/current/language/types.html#interval-day-to-second
2025-02-13 20:59:47 +01:00
# https://docs.databricks.com/en/sql/language-manual/data-types/interval-type.html
2025-02-13 21:04:58 +01:00
class IntervalSpan ( DataType ) :
2025-02-13 20:59:47 +01:00
arg_types = { " this " : True , " expression " : True }
2025-02-13 20:55:29 +01:00
2025-02-13 06:15:54 +01:00
class Interval ( TimeUnit ) :
2025-02-13 15:01:55 +01:00
arg_types = { " this " : False , " unit " : False }
2025-02-13 06:15:54 +01:00
class IgnoreNulls ( Expression ) :
pass
2025-02-13 08:04:41 +01:00
class RespectNulls ( Expression ) :
pass
2025-02-13 21:21:45 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate-function-calls#max_min_clause
class HavingMax ( Expression ) :
arg_types = { " this " : True , " expression " : True , " max " : True }
2025-02-13 06:15:54 +01:00
# Functions
class Func ( Condition ) :
"""
The base class for all function expressions .
2025-02-13 15:07:05 +01:00
Attributes :
is_var_len_args ( bool ) : if set to True the last argument defined in arg_types will be
treated as a variable length argument and the argument ' s value will be stored as a list.
2025-02-13 21:28:36 +01:00
_sql_names ( list ) : the SQL name ( 1 st item in the list ) and aliases ( subsequent items ) for this
function expression . These values are used to map this node to a name during parsing as
well as to provide the function ' s name during SQL string generation. By default the SQL
2025-02-13 15:07:05 +01:00
name is set to the expression ' s class name transformed to snake case.
2025-02-13 06:15:54 +01:00
"""
is_var_len_args = False
@classmethod
def from_arg_list ( cls , args ) :
2025-02-13 14:43:32 +01:00
if cls . is_var_len_args :
all_arg_keys = list ( cls . arg_types )
# If this function supports variable length argument treat the last argument as such.
non_var_len_arg_keys = all_arg_keys [ : - 1 ] if cls . is_var_len_args else all_arg_keys
num_non_var = len ( non_var_len_arg_keys )
args_dict = { arg_key : arg for arg , arg_key in zip ( args , non_var_len_arg_keys ) }
args_dict [ all_arg_keys [ - 1 ] ] = args [ num_non_var : ]
else :
args_dict = { arg_key : arg for arg , arg_key in zip ( args , cls . arg_types ) }
2025-02-13 06:15:54 +01:00
return cls ( * * args_dict )
@classmethod
def sql_names ( cls ) :
if cls is Func :
2025-02-13 14:53:05 +01:00
raise NotImplementedError (
" SQL name is only supported by concrete function implementations "
)
2025-02-13 15:07:05 +01:00
if " _sql_names " not in cls . __dict__ :
2025-02-13 06:15:54 +01:00
cls . _sql_names = [ camel_to_snake_case ( cls . __name__ ) ]
return cls . _sql_names
@classmethod
def sql_name ( cls ) :
return cls . sql_names ( ) [ 0 ]
@classmethod
def default_parser_mappings ( cls ) :
return { name : cls . from_arg_list for name in cls . sql_names ( ) }
class AggFunc ( Func ) :
pass
2025-02-13 15:57:23 +01:00
class ParameterizedAgg ( AggFunc ) :
arg_types = { " this " : True , " expressions " : True , " params " : True }
2025-02-13 06:15:54 +01:00
class Abs ( Func ) :
pass
2025-02-13 21:09:41 +01:00
class ArgMax ( AggFunc ) :
arg_types = { " this " : True , " expression " : True , " count " : False }
_sql_names = [ " ARG_MAX " , " ARGMAX " , " MAX_BY " ]
class ArgMin ( AggFunc ) :
arg_types = { " this " : True , " expression " : True , " count " : False }
_sql_names = [ " ARG_MIN " , " ARGMIN " , " MIN_BY " ]
class ApproxTopK ( AggFunc ) :
arg_types = { " this " : True , " expression " : False , " counters " : False }
class Flatten ( Func ) :
pass
2025-02-13 20:46:55 +01:00
# https://spark.apache.org/docs/latest/api/sql/index.html#transform
class Transform ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Anonymous ( Func ) :
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 21:29:39 +01:00
@property
def name ( self ) - > str :
return self . this if isinstance ( self . this , str ) else self . this . name
2025-02-13 06:15:54 +01:00
2025-02-13 21:19:14 +01:00
class AnonymousAggFunc ( AggFunc ) :
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
# https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators
class CombinedAggFunc ( AnonymousAggFunc ) :
arg_types = { " this " : True , " expressions " : False , " parts " : True }
class CombinedParameterizedAgg ( ParameterizedAgg ) :
arg_types = { " this " : True , " expressions " : True , " params " : True , " parts " : True }
2025-02-13 15:50:57 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/hll
# https://docs.aws.amazon.com/redshift/latest/dg/r_HLL_function.html
class Hll ( AggFunc ) :
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 06:15:54 +01:00
class ApproxDistinct ( AggFunc ) :
arg_types = { " this " : True , " accuracy " : False }
2025-02-13 15:57:23 +01:00
_sql_names = [ " APPROX_DISTINCT " , " APPROX_COUNT_DISTINCT " ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:55:19 +01:00
class Apply ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Array ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " expressions " : False , " bracket_notation " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
2025-02-13 21:17:51 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/to_array
class ToArray ( Func ) :
pass
2025-02-13 21:37:40 +01:00
# https://materialize.com/docs/sql/types/list/
class List ( Func ) :
arg_types = { " expressions " : False }
is_var_len_args = True
2025-02-13 21:43:00 +01:00
# String pad, kind True -> LPAD, False -> RPAD
class Pad ( Func ) :
arg_types = { " this " : True , " expression " : True , " fill_pattern " : False , " is_left " : True }
2025-02-13 15:44:58 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/to_char
2025-02-13 21:17:09 +01:00
# https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/TO_CHAR-number.html
2025-02-13 15:44:58 +01:00
class ToChar ( Func ) :
2025-02-13 21:17:09 +01:00
arg_types = { " this " : True , " format " : False , " nlsparam " : False }
2025-02-13 15:44:58 +01:00
2025-02-13 21:30:28 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/to_decimal
# https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/TO_NUMBER.html
class ToNumber ( Func ) :
arg_types = {
" this " : True ,
" format " : False ,
" nlsparam " : False ,
" precision " : False ,
" scale " : False ,
}
2025-02-13 21:56:02 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/to_double
class ToDouble ( Func ) :
arg_types = {
" this " : True ,
" format " : False ,
}
2025-02-13 21:55:19 +01:00
class Columns ( Func ) :
arg_types = { " this " : True , " unpack " : False }
2025-02-13 21:29:39 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-ver16#syntax
class Convert ( Func ) :
arg_types = { " this " : True , " expression " : True , " style " : False }
2025-02-13 21:52:32 +01:00
class ConvertTimezone ( Func ) :
arg_types = { " source_tz " : False , " target_tz " : True , " timestamp " : True }
2025-02-13 15:09:58 +01:00
class GenerateSeries ( Func ) :
2025-02-13 21:29:39 +01:00
arg_types = { " start " : True , " end " : True , " step " : False , " is_end_exclusive " : False }
2025-02-13 15:09:58 +01:00
2025-02-13 21:52:32 +01:00
# Postgres' GENERATE_SERIES function returns a row set, i.e. it implicitly explodes when it's
# used in a projection, so this expression is a helper that facilitates transpilation to other
# dialects. For example, we'd generate UNNEST(GENERATE_SERIES(...)) in DuckDB
class ExplodingGenerateSeries ( GenerateSeries ) :
pass
2025-02-13 06:15:54 +01:00
class ArrayAgg ( AggFunc ) :
2025-02-13 21:54:13 +01:00
arg_types = { " this " : True , " nulls_excluded " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
class ArrayUniqueAgg ( AggFunc ) :
pass
2025-02-13 06:15:54 +01:00
class ArrayAll ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 21:28:36 +01:00
# Represents Python's `any(f(x) for x in array)`, where `array` is `this` and `f` is `expression`
2025-02-13 06:15:54 +01:00
class ArrayAny ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 14:48:46 +01:00
class ArrayConcat ( Func ) :
2025-02-13 20:58:22 +01:00
_sql_names = [ " ARRAY_CONCAT " , " ARRAY_CAT " ]
2025-02-13 14:48:46 +01:00
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 21:35:32 +01:00
class ArrayConstructCompact ( Func ) :
arg_types = { " expressions " : True }
is_var_len_args = True
2025-02-13 15:44:58 +01:00
class ArrayContains ( Binary , Func ) :
2025-02-13 21:36:08 +01:00
_sql_names = [ " ARRAY_CONTAINS " , " ARRAY_HAS " ]
2025-02-13 15:44:58 +01:00
2025-02-13 21:36:08 +01:00
class ArrayContainsAll ( Binary , Func ) :
_sql_names = [ " ARRAY_CONTAINS_ALL " , " ARRAY_HAS_ALL " ]
2025-02-13 06:15:54 +01:00
class ArrayFilter ( Func ) :
arg_types = { " this " : True , " expression " : True }
_sql_names = [ " FILTER " , " ARRAY_FILTER " ]
2025-02-13 21:30:28 +01:00
class ArrayToString ( Func ) :
2025-02-13 15:42:13 +01:00
arg_types = { " this " : True , " expression " : True , " null " : False }
2025-02-13 21:30:28 +01:00
_sql_names = [ " ARRAY_TO_STRING " , " ARRAY_JOIN " ]
2025-02-13 15:42:13 +01:00
2025-02-13 21:56:19 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#string
class String ( Func ) :
arg_types = { " this " : True , " zone " : False }
2025-02-13 21:37:09 +01:00
class StringToArray ( Func ) :
arg_types = { " this " : True , " expression " : True , " null " : False }
_sql_names = [ " STRING_TO_ARRAY " , " SPLIT_BY_STRING " ]
2025-02-13 21:20:36 +01:00
class ArrayOverlaps ( Binary , Func ) :
pass
2025-02-13 06:15:54 +01:00
class ArraySize ( Func ) :
2025-02-13 14:56:25 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:28:36 +01:00
_sql_names = [ " ARRAY_SIZE " , " ARRAY_LENGTH " ]
2025-02-13 06:15:54 +01:00
class ArraySort ( Func ) :
arg_types = { " this " : True , " expression " : False }
class ArraySum ( Func ) :
2025-02-13 21:19:14 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 06:15:54 +01:00
class ArrayUnionAgg ( AggFunc ) :
pass
class Avg ( AggFunc ) :
pass
class AnyValue ( AggFunc ) :
2025-02-13 21:21:45 +01:00
pass
2025-02-13 21:20:36 +01:00
class Lag ( AggFunc ) :
arg_types = { " this " : True , " offset " : False , " default " : False }
class Lead ( AggFunc ) :
arg_types = { " this " : True , " offset " : False , " default " : False }
# some dialects have a distinction between first and first_value, usually first is an aggregate func
# and first_value is a window func
class First ( AggFunc ) :
pass
class Last ( AggFunc ) :
pass
class FirstValue ( AggFunc ) :
pass
2025-02-13 20:58:22 +01:00
2025-02-13 21:20:36 +01:00
class LastValue ( AggFunc ) :
pass
2025-02-13 20:58:22 +01:00
2025-02-13 21:20:36 +01:00
class NthValue ( AggFunc ) :
arg_types = { " this " : True , " offset " : True }
2025-02-13 06:15:54 +01:00
class Case ( Func ) :
arg_types = { " this " : False , " ifs " : True , " default " : False }
2025-02-13 15:52:09 +01:00
def when ( self , condition : ExpOrStr , then : ExpOrStr , copy : bool = True , * * opts ) - > Case :
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( self , copy )
2025-02-13 15:53:39 +01:00
instance . append (
" ifs " ,
If (
this = maybe_parse ( condition , copy = copy , * * opts ) ,
true = maybe_parse ( then , copy = copy , * * opts ) ,
) ,
)
return instance
2025-02-13 15:52:09 +01:00
def else_ ( self , condition : ExpOrStr , copy : bool = True , * * opts ) - > Case :
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( self , copy )
2025-02-13 15:53:39 +01:00
instance . set ( " default " , maybe_parse ( condition , copy = copy , * * opts ) )
return instance
2025-02-13 15:52:09 +01:00
2025-02-13 06:15:54 +01:00
class Cast ( Func ) :
2025-02-13 21:30:28 +01:00
arg_types = {
" this " : True ,
" to " : True ,
" format " : False ,
" safe " : False ,
" action " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 14:46:58 +01:00
@property
2025-02-13 15:23:26 +01:00
def name ( self ) - > str :
2025-02-13 14:46:58 +01:00
return self . this . name
@property
2025-02-13 15:57:23 +01:00
def to ( self ) - > DataType :
2025-02-13 14:46:58 +01:00
return self . args [ " to " ]
2025-02-13 15:07:05 +01:00
@property
2025-02-13 15:57:23 +01:00
def output_name ( self ) - > str :
2025-02-13 15:07:05 +01:00
return self . name
2025-02-13 21:17:09 +01:00
def is_type ( self , * dtypes : DATA_TYPE ) - > bool :
2025-02-13 20:58:22 +01:00
"""
Checks whether this Cast ' s DataType matches one of the provided data types. Nested types
like arrays or structs will be compared using " structural equivalence " semantics , so e . g .
array < int > != array < float > .
2025-02-13 15:09:58 +01:00
2025-02-13 20:58:22 +01:00
Args :
dtypes : the data types to compare this Cast ' s DataType to.
2025-02-13 06:15:54 +01:00
2025-02-13 20:58:22 +01:00
Returns :
True , if and only if there is a type in ` dtypes ` which is equal to this Cast ' s DataType.
"""
return self . to . is_type ( * dtypes )
2025-02-13 15:57:23 +01:00
2025-02-13 20:58:22 +01:00
class TryCast ( Cast ) :
2025-02-13 14:54:32 +01:00
pass
2025-02-13 21:33:25 +01:00
class Try ( Func ) :
pass
2025-02-13 20:58:22 +01:00
class CastToStrType ( Func ) :
arg_types = { " this " : True , " to " : True }
2025-02-13 21:04:58 +01:00
class Collate ( Binary , Func ) :
2025-02-13 06:15:54 +01:00
pass
class Ceil ( Func ) :
2025-02-13 14:40:43 +01:00
arg_types = { " this " : True , " decimals " : False }
2025-02-13 06:15:54 +01:00
_sql_names = [ " CEIL " , " CEILING " ]
class Coalesce ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " this " : True , " expressions " : False , " is_nvl " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
2025-02-13 16:00:51 +01:00
_sql_names = [ " COALESCE " , " IFNULL " , " NVL " ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:04:58 +01:00
class Chr ( Func ) :
2025-02-13 21:54:47 +01:00
arg_types = { " expressions " : True , " charset " : False }
2025-02-13 21:04:58 +01:00
is_var_len_args = True
_sql_names = [ " CHR " , " CHAR " ]
2025-02-13 14:54:32 +01:00
class Concat ( Func ) :
2025-02-13 21:17:09 +01:00
arg_types = { " expressions " : True , " safe " : False , " coalesce " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
2025-02-13 14:54:32 +01:00
class ConcatWs ( Concat ) :
_sql_names = [ " CONCAT_WS " ]
2025-02-13 21:57:20 +01:00
class Contains ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 21:29:39 +01:00
# https://docs.oracle.com/cd/B13789_01/server.101/b10759/operators004.htm#i1035022
class ConnectByRoot ( Func ) :
pass
2025-02-13 06:15:54 +01:00
class Count ( AggFunc ) :
2025-02-13 21:54:13 +01:00
arg_types = { " this " : False , " expressions " : False , " big_int " : False }
2025-02-13 16:00:51 +01:00
is_var_len_args = True
2025-02-13 06:15:54 +01:00
2025-02-13 15:46:19 +01:00
class CountIf ( AggFunc ) :
2025-02-13 21:19:14 +01:00
_sql_names = [ " COUNT_IF " , " COUNTIF " ]
2025-02-13 15:46:19 +01:00
2025-02-13 21:27:51 +01:00
# cube root
class Cbrt ( Func ) :
pass
2025-02-13 06:15:54 +01:00
class CurrentDate ( Func ) :
arg_types = { " this " : False }
class CurrentDatetime ( Func ) :
arg_types = { " this " : False }
class CurrentTime ( Func ) :
arg_types = { " this " : False }
class CurrentTimestamp ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " this " : False , " sysdate " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:50:57 +01:00
class CurrentUser ( Func ) :
arg_types = { " this " : False }
2025-02-13 21:04:58 +01:00
class DateAdd ( Func , IntervalOp ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
2025-02-13 21:04:58 +01:00
class DateSub ( Func , IntervalOp ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
class DateDiff ( Func , TimeUnit ) :
2025-02-13 15:44:58 +01:00
_sql_names = [ " DATEDIFF " , " DATE_DIFF " ]
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
2025-02-13 14:53:05 +01:00
class DateTrunc ( Func ) :
2025-02-13 15:26:26 +01:00
arg_types = { " unit " : True , " this " : True , " zone " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:19:14 +01:00
def __init__ ( self , * * args ) :
2025-02-13 21:56:19 +01:00
# Across most dialects it's safe to unabbreviate the unit (e.g. 'Q' -> 'QUARTER') except Oracle
# https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html
unabbreviate = args . pop ( " unabbreviate " , True )
2025-02-13 21:19:14 +01:00
unit = args . get ( " unit " )
if isinstance ( unit , TimeUnit . VAR_LIKE ) :
2025-02-13 21:56:19 +01:00
unit_name = unit . name . upper ( )
if unabbreviate and unit_name in TimeUnit . UNABBREVIATED_UNIT_NAME :
unit_name = TimeUnit . UNABBREVIATED_UNIT_NAME [ unit_name ]
args [ " unit " ] = Literal . string ( unit_name )
2025-02-13 21:19:14 +01:00
elif isinstance ( unit , Week ) :
unit . set ( " this " , Literal . string ( unit . this . name . upper ( ) ) )
super ( ) . __init__ ( * * args )
2025-02-13 21:03:38 +01:00
@property
def unit ( self ) - > Expression :
return self . args [ " unit " ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:41:14 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/datetime_functions#datetime
# expression can either be time_expr or time_zone
class Datetime ( Func ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:04:58 +01:00
class DatetimeAdd ( Func , IntervalOp ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
2025-02-13 21:04:58 +01:00
class DatetimeSub ( Func , IntervalOp ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
class DatetimeDiff ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class DatetimeTrunc ( Func , TimeUnit ) :
arg_types = { " this " : True , " unit " : True , " zone " : False }
2025-02-13 15:09:58 +01:00
class DayOfWeek ( Func ) :
_sql_names = [ " DAY_OF_WEEK " , " DAYOFWEEK " ]
2025-02-13 21:54:13 +01:00
# https://duckdb.org/docs/sql/functions/datepart.html#part-specifiers-only-usable-as-date-part-specifiers
# ISO day of week function in duckdb is ISODOW
class DayOfWeekIso ( Func ) :
_sql_names = [ " DAYOFWEEK_ISO " , " ISODOW " ]
2025-02-13 15:09:58 +01:00
class DayOfMonth ( Func ) :
_sql_names = [ " DAY_OF_MONTH " , " DAYOFMONTH " ]
class DayOfYear ( Func ) :
_sql_names = [ " DAY_OF_YEAR " , " DAYOFYEAR " ]
2025-02-13 21:04:58 +01:00
class ToDays ( Func ) :
pass
2025-02-13 15:09:58 +01:00
class WeekOfYear ( Func ) :
_sql_names = [ " WEEK_OF_YEAR " , " WEEKOFYEAR " ]
2025-02-13 20:46:55 +01:00
class MonthsBetween ( Func ) :
arg_types = { " this " : True , " expression " : True , " roundoff " : False }
2025-02-13 21:57:20 +01:00
class MakeInterval ( Func ) :
arg_types = {
" year " : False ,
" month " : False ,
" day " : False ,
" hour " : False ,
" minute " : False ,
" second " : False ,
}
2025-02-13 21:19:14 +01:00
class LastDay ( Func , TimeUnit ) :
_sql_names = [ " LAST_DAY " , " LAST_DAY_OF_MONTH " ]
arg_types = { " this " : True , " unit " : False }
2025-02-13 15:03:38 +01:00
2025-02-13 06:15:54 +01:00
class Extract ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 21:56:19 +01:00
class Exists ( Func , SubqueryPredicate ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:02:36 +01:00
class Timestamp ( Func ) :
2025-02-13 21:41:14 +01:00
arg_types = { " this " : False , " zone " : False , " with_tz " : False }
2025-02-13 21:02:36 +01:00
2025-02-13 06:15:54 +01:00
class TimestampAdd ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimestampSub ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimestampDiff ( Func , TimeUnit ) :
2025-02-13 21:20:36 +01:00
_sql_names = [ " TIMESTAMPDIFF " , " TIMESTAMP_DIFF " ]
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimestampTrunc ( Func , TimeUnit ) :
arg_types = { " this " : True , " unit " : True , " zone " : False }
class TimeAdd ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimeSub ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimeDiff ( Func , TimeUnit ) :
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TimeTrunc ( Func , TimeUnit ) :
arg_types = { " this " : True , " unit " : True , " zone " : False }
2025-02-13 14:50:31 +01:00
class DateFromParts ( Func ) :
2025-02-13 21:19:14 +01:00
_sql_names = [ " DATE_FROM_PARTS " , " DATEFROMPARTS " ]
2025-02-13 14:50:31 +01:00
arg_types = { " year " : True , " month " : True , " day " : True }
2025-02-13 21:19:14 +01:00
class TimeFromParts ( Func ) :
_sql_names = [ " TIME_FROM_PARTS " , " TIMEFROMPARTS " ]
arg_types = {
" hour " : True ,
" min " : True ,
" sec " : True ,
" nano " : False ,
" fractions " : False ,
" precision " : False ,
}
2025-02-13 06:15:54 +01:00
class DateStrToDate ( Func ) :
pass
class DateToDateStr ( Func ) :
pass
class DateToDi ( Func ) :
pass
2025-02-13 20:43:05 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date
2025-02-13 16:00:51 +01:00
class Date ( Func ) :
2025-02-13 21:03:38 +01:00
arg_types = { " this " : False , " zone " : False , " expressions " : False }
is_var_len_args = True
2025-02-13 16:00:51 +01:00
2025-02-13 06:15:54 +01:00
class Day ( Func ) :
pass
2025-02-13 14:54:32 +01:00
class Decode ( Func ) :
2025-02-13 15:05:06 +01:00
arg_types = { " this " : True , " charset " : True , " replace " : False }
2025-02-13 14:54:32 +01:00
2025-02-13 06:15:54 +01:00
class DiToDate ( Func ) :
pass
2025-02-13 14:54:32 +01:00
class Encode ( Func ) :
arg_types = { " this " : True , " charset " : True }
2025-02-13 06:15:54 +01:00
class Exp ( Func ) :
pass
2025-02-13 21:09:41 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/flatten
2025-02-13 21:56:02 +01:00
class Explode ( Func , UDTF ) :
2025-02-13 21:09:41 +01:00
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 06:15:54 +01:00
2025-02-13 21:54:13 +01:00
# https://spark.apache.org/docs/latest/api/sql/#inline
class Inline ( Func ) :
pass
2025-02-13 21:08:10 +01:00
class ExplodeOuter ( Explode ) :
pass
class Posexplode ( Explode ) :
pass
2025-02-13 21:27:51 +01:00
class PosexplodeOuter ( Posexplode , ExplodeOuter ) :
2025-02-13 21:08:10 +01:00
pass
2025-02-13 21:39:30 +01:00
class Unnest ( Func , UDTF ) :
arg_types = {
" expressions " : True ,
" alias " : False ,
" offset " : False ,
2025-02-13 21:52:32 +01:00
" explode_array " : False ,
2025-02-13 21:39:30 +01:00
}
@property
def selects ( self ) - > t . List [ Expression ] :
columns = super ( ) . selects
offset = self . args . get ( " offset " )
if offset :
columns = columns + [ to_identifier ( " offset " ) if offset is True else offset ]
return columns
2025-02-13 06:15:54 +01:00
class Floor ( Func ) :
2025-02-13 14:40:43 +01:00
arg_types = { " this " : True , " decimals " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:53:39 +01:00
class FromBase64 ( Func ) :
pass
2025-02-13 21:57:37 +01:00
class FeaturesAtTime ( Func ) :
arg_types = { " this " : True , " time " : False , " num_rows " : False , " ignore_feature_nulls " : False }
2025-02-13 15:53:39 +01:00
class ToBase64 ( Func ) :
pass
2025-02-13 21:52:32 +01:00
# https://trino.io/docs/current/functions/datetime.html#from_iso8601_timestamp
class FromISO8601Timestamp ( Func ) :
_sql_names = [ " FROM_ISO8601_TIMESTAMP " ]
2025-02-13 21:37:40 +01:00
class GapFill ( Func ) :
arg_types = {
" this " : True ,
" ts_column " : True ,
" bucket_width " : True ,
" partitioning_columns " : False ,
" value_columns " : False ,
" origin " : False ,
" ignore_nulls " : False ,
}
2025-02-13 21:52:32 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#generate_date_array
2025-02-13 21:30:28 +01:00
class GenerateDateArray ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " start " : True , " end " : True , " step " : False }
# https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#generate_timestamp_array
class GenerateTimestampArray ( Func ) :
arg_types = { " start " : True , " end " : True , " step " : True }
2025-02-13 21:30:28 +01:00
2025-02-13 06:15:54 +01:00
class Greatest ( Func ) :
2025-02-13 15:05:06 +01:00
arg_types = { " this " : True , " expressions " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
2025-02-13 21:56:19 +01:00
# Trino's `ON OVERFLOW TRUNCATE [filler_string] {WITH | WITHOUT} COUNT`
# https://trino.io/docs/current/functions/aggregate.html#listagg
class OverflowTruncateBehavior ( Expression ) :
arg_types = { " this " : False , " with_count " : True }
2025-02-13 20:58:22 +01:00
class GroupConcat ( AggFunc ) :
2025-02-13 21:56:19 +01:00
arg_types = { " this " : True , " separator " : False , " on_overflow " : False }
2025-02-13 08:04:41 +01:00
2025-02-13 14:54:32 +01:00
class Hex ( Func ) :
pass
2025-02-13 21:35:32 +01:00
class LowerHex ( Hex ) :
pass
2025-02-13 20:46:55 +01:00
class Xor ( Connector , Func ) :
arg_types = { " this " : False , " expression " : False , " expressions " : False }
2025-02-13 06:15:54 +01:00
class If ( Func ) :
arg_types = { " this " : True , " true " : True , " false " : False }
2025-02-13 21:27:51 +01:00
_sql_names = [ " IF " , " IIF " ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
class Nullif ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Initcap ( Func ) :
2025-02-13 15:57:23 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 20:48:36 +01:00
class IsNan ( Func ) :
_sql_names = [ " IS_NAN " , " ISNAN " ]
2025-02-13 21:57:20 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#int64_for_json
class Int64 ( Func ) :
pass
2025-02-13 21:17:09 +01:00
class IsInf ( Func ) :
_sql_names = [ " IS_INF " , " ISINF " ]
2025-02-13 21:52:55 +01:00
# https://www.postgresql.org/docs/current/functions-json.html
class JSON ( Expression ) :
arg_types = { " this " : False , " with " : False , " unique " : False }
2025-02-13 21:20:36 +01:00
class JSONPath ( Expression ) :
2025-02-13 21:54:47 +01:00
arg_types = { " expressions " : True , " escape " : False }
2025-02-13 21:20:36 +01:00
@property
def output_name ( self ) - > str :
last_segment = self . expressions [ - 1 ] . this
return last_segment if isinstance ( last_segment , str ) else " "
class JSONPathPart ( Expression ) :
arg_types = { }
class JSONPathFilter ( JSONPathPart ) :
arg_types = { " this " : True }
class JSONPathKey ( JSONPathPart ) :
arg_types = { " this " : True }
class JSONPathRecursive ( JSONPathPart ) :
arg_types = { " this " : False }
class JSONPathRoot ( JSONPathPart ) :
pass
class JSONPathScript ( JSONPathPart ) :
arg_types = { " this " : True }
class JSONPathSlice ( JSONPathPart ) :
arg_types = { " start " : False , " end " : False , " step " : False }
class JSONPathSelector ( JSONPathPart ) :
arg_types = { " this " : True }
class JSONPathSubscript ( JSONPathPart ) :
arg_types = { " this " : True }
class JSONPathUnion ( JSONPathPart ) :
arg_types = { " expressions " : True }
class JSONPathWildcard ( JSONPathPart ) :
pass
2025-02-13 21:01:12 +01:00
class FormatJson ( Expression ) :
pass
2025-02-13 15:48:10 +01:00
class JSONKeyValue ( Expression ) :
arg_types = { " this " : True , " expression " : True }
class JSONObject ( Func ) :
arg_types = {
" expressions " : False ,
" null_handling " : False ,
" unique_keys " : False ,
" return_type " : False ,
" encoding " : False ,
}
2025-02-13 21:19:14 +01:00
class JSONObjectAgg ( AggFunc ) :
arg_types = {
" expressions " : False ,
" null_handling " : False ,
" unique_keys " : False ,
" return_type " : False ,
" encoding " : False ,
}
2025-02-13 21:01:12 +01:00
# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAY.html
class JSONArray ( Func ) :
arg_types = {
" expressions " : True ,
" null_handling " : False ,
" return_type " : False ,
" strict " : False ,
}
# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_ARRAYAGG.html
class JSONArrayAgg ( Func ) :
arg_types = {
" this " : True ,
" order " : False ,
" null_handling " : False ,
" return_type " : False ,
" strict " : False ,
}
2025-02-13 21:54:13 +01:00
class JSONExists ( Func ) :
arg_types = { " this " : True , " path " : True , " passing " : False , " on_condition " : False }
2025-02-13 21:01:12 +01:00
# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
# Note: parsing of JSON column definitions is currently incomplete.
class JSONColumnDef ( Expression ) :
2025-02-13 21:08:10 +01:00
arg_types = { " this " : False , " kind " : False , " path " : False , " nested_schema " : False }
class JSONSchema ( Expression ) :
arg_types = { " expressions " : True }
2025-02-13 21:01:12 +01:00
2025-02-13 21:52:55 +01:00
# https://dev.mysql.com/doc/refman/8.4/en/json-search-functions.html#function_json-value
class JSONValue ( Expression ) :
arg_types = {
" this " : True ,
" path " : True ,
" returning " : False ,
2025-02-13 21:54:13 +01:00
" on_condition " : False ,
2025-02-13 21:52:55 +01:00
}
2025-02-13 21:56:38 +01:00
class JSONValueArray ( Func ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:01:12 +01:00
# # https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/JSON_TABLE.html
class JSONTable ( Func ) :
arg_types = {
" this " : True ,
2025-02-13 21:08:10 +01:00
" schema " : True ,
2025-02-13 21:01:12 +01:00
" path " : False ,
" error_handling " : False ,
" empty_handling " : False ,
}
2025-02-13 21:51:42 +01:00
# https://docs.snowflake.com/en/sql-reference/functions/object_insert
class ObjectInsert ( Func ) :
arg_types = {
" this " : True ,
" key " : True ,
" value " : True ,
" update_flag " : False ,
}
2025-02-13 15:57:23 +01:00
class OpenJSONColumnDef ( Expression ) :
arg_types = { " this " : True , " kind " : True , " path " : False , " as_json " : False }
class OpenJSON ( Func ) :
arg_types = { " this " : True , " path " : False , " expressions " : False }
2025-02-13 21:41:14 +01:00
class JSONBContains ( Binary , Func ) :
2025-02-13 15:01:55 +01:00
_sql_names = [ " JSONB_CONTAINS " ]
2025-02-13 21:56:19 +01:00
class JSONBExists ( Func ) :
arg_types = { " this " : True , " path " : True }
_sql_names = [ " JSONB_EXISTS " ]
2025-02-13 15:01:55 +01:00
class JSONExtract ( Binary , Func ) :
2025-02-13 21:41:14 +01:00
arg_types = {
" this " : True ,
" expression " : True ,
" only_json_types " : False ,
" expressions " : False ,
" variant_extract " : False ,
2025-02-13 21:55:40 +01:00
" json_query " : False ,
" option " : False ,
2025-02-13 21:41:14 +01:00
}
2025-02-13 06:15:54 +01:00
_sql_names = [ " JSON_EXTRACT " ]
2025-02-13 21:20:36 +01:00
is_var_len_args = True
@property
def output_name ( self ) - > str :
return self . expression . output_name if not self . expressions else " "
2025-02-13 06:15:54 +01:00
2025-02-13 21:56:38 +01:00
class JSONExtractArray ( Func ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 21:57:37 +01:00
_sql_names = [ " JSON_EXTRACT_ARRAY " ]
2025-02-13 21:56:38 +01:00
2025-02-13 21:20:36 +01:00
class JSONExtractScalar ( Binary , Func ) :
2025-02-13 21:28:36 +01:00
arg_types = { " this " : True , " expression " : True , " only_json_types " : False , " expressions " : False }
2025-02-13 06:15:54 +01:00
_sql_names = [ " JSON_EXTRACT_SCALAR " ]
2025-02-13 21:20:36 +01:00
is_var_len_args = True
@property
def output_name ( self ) - > str :
return self . expression . output_name
2025-02-13 06:15:54 +01:00
2025-02-13 21:20:36 +01:00
class JSONBExtract ( Binary , Func ) :
2025-02-13 06:15:54 +01:00
_sql_names = [ " JSONB_EXTRACT " ]
2025-02-13 21:20:36 +01:00
class JSONBExtractScalar ( Binary , Func ) :
2025-02-13 06:15:54 +01:00
_sql_names = [ " JSONB_EXTRACT_SCALAR " ]
2025-02-13 15:50:57 +01:00
class JSONFormat ( Func ) :
arg_types = { " this " : False , " options " : False }
_sql_names = [ " JSON_FORMAT " ]
2025-02-13 20:43:05 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html#operator_member-of
class JSONArrayContains ( Binary , Predicate , Func ) :
_sql_names = [ " JSON_ARRAY_CONTAINS " ]
2025-02-13 21:02:36 +01:00
class ParseJSON ( Func ) :
# BigQuery, Snowflake have PARSE_JSON, Presto has JSON_PARSE
2025-02-13 21:41:14 +01:00
# Snowflake also has TRY_PARSE_JSON, which is represented using `safe`
2025-02-13 21:02:36 +01:00
_sql_names = [ " PARSE_JSON " , " JSON_PARSE " ]
2025-02-13 21:41:14 +01:00
arg_types = { " this " : True , " expression " : False , " safe " : False }
2025-02-13 21:02:36 +01:00
2025-02-13 06:15:54 +01:00
class Least ( Func ) :
2025-02-13 20:44:18 +01:00
arg_types = { " this " : True , " expressions " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
2025-02-13 15:58:40 +01:00
class Left ( Func ) :
arg_types = { " this " : True , " expression " : True }
class Right ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Length ( Func ) :
2025-02-13 21:41:14 +01:00
arg_types = { " this " : True , " binary " : False }
2025-02-13 15:58:40 +01:00
_sql_names = [ " LENGTH " , " LEN " ]
2025-02-13 06:15:54 +01:00
class Levenshtein ( Func ) :
2025-02-13 15:03:38 +01:00
arg_types = {
" this " : True ,
" expression " : False ,
" ins_cost " : False ,
" del_cost " : False ,
" sub_cost " : False ,
2025-02-13 21:56:19 +01:00
" max_dist " : False ,
2025-02-13 15:03:38 +01:00
}
2025-02-13 06:15:54 +01:00
class Ln ( Func ) :
pass
class Log ( Func ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 15:01:55 +01:00
class LogicalOr ( AggFunc ) :
2025-02-13 15:46:19 +01:00
_sql_names = [ " LOGICAL_OR " , " BOOL_OR " , " BOOLOR_AGG " ]
class LogicalAnd ( AggFunc ) :
_sql_names = [ " LOGICAL_AND " , " BOOL_AND " , " BOOLAND_AGG " ]
2025-02-13 15:01:55 +01:00
2025-02-13 06:15:54 +01:00
class Lower ( Func ) :
2025-02-13 14:54:32 +01:00
_sql_names = [ " LOWER " , " LCASE " ]
2025-02-13 06:15:54 +01:00
class Map ( Func ) :
2025-02-13 15:05:06 +01:00
arg_types = { " keys " : False , " values " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:09:41 +01:00
@property
def keys ( self ) - > t . List [ Expression ] :
keys = self . args . get ( " keys " )
return keys . expressions if keys else [ ]
@property
def values ( self ) - > t . List [ Expression ] :
values = self . args . get ( " values " )
return values . expressions if values else [ ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
# Represents the MAP {...} syntax in DuckDB - basically convert a struct to a MAP
class ToMap ( Func ) :
pass
2025-02-13 20:21:40 +01:00
class MapFromEntries ( Func ) :
pass
2025-02-13 21:41:14 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/language-elements/scope-resolution-operator-transact-sql?view=sql-server-ver16
class ScopeResolution ( Expression ) :
arg_types = { " this " : False , " expression " : True }
2025-02-13 21:52:32 +01:00
class Stream ( Expression ) :
pass
2025-02-13 15:52:09 +01:00
class StarMap ( Func ) :
pass
2025-02-13 14:45:11 +01:00
class VarMap ( Func ) :
arg_types = { " keys " : True , " values " : True }
is_var_len_args = True
2025-02-13 15:57:23 +01:00
@property
def keys ( self ) - > t . List [ Expression ] :
return self . args [ " keys " ] . expressions
@property
def values ( self ) - > t . List [ Expression ] :
return self . args [ " values " ] . expressions
2025-02-13 14:45:11 +01:00
2025-02-13 15:50:57 +01:00
# https://dev.mysql.com/doc/refman/8.0/en/fulltext-search.html
class MatchAgainst ( Func ) :
arg_types = { " this " : True , " expressions " : True , " modifier " : False }
2025-02-13 14:56:25 +01:00
2025-02-13 06:15:54 +01:00
class Max ( AggFunc ) :
2025-02-13 15:43:32 +01:00
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 06:15:54 +01:00
2025-02-13 15:52:09 +01:00
class MD5 ( Func ) :
_sql_names = [ " MD5 " ]
2025-02-13 20:44:18 +01:00
# Represents the variant of the MD5 function that returns a binary value
class MD5Digest ( Func ) :
_sql_names = [ " MD5_DIGEST " ]
2025-02-13 21:56:19 +01:00
class Median ( AggFunc ) :
pass
2025-02-13 06:15:54 +01:00
class Min ( AggFunc ) :
2025-02-13 15:43:32 +01:00
arg_types = { " this " : True , " expressions " : False }
is_var_len_args = True
2025-02-13 06:15:54 +01:00
class Month ( Func ) :
pass
2025-02-13 21:29:39 +01:00
class AddMonths ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Nvl2 ( Func ) :
arg_types = { " this " : True , " true " : True , " false " : False }
2025-02-13 21:54:13 +01:00
class Normalize ( Func ) :
arg_types = { " this " : True , " form " : False }
2025-02-13 21:55:19 +01:00
class Overlay ( Func ) :
arg_types = { " this " : True , " expression " : True , " from " : True , " for " : False }
2025-02-13 21:06:11 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-predict#mlpredict_function
class Predict ( Func ) :
arg_types = { " this " : True , " expression " : True , " params_struct " : False }
2025-02-13 15:08:15 +01:00
class Pow ( Binary , Func ) :
2025-02-13 06:15:54 +01:00
_sql_names = [ " POWER " , " POW " ]
2025-02-13 15:07:05 +01:00
class PercentileCont ( AggFunc ) :
2025-02-13 15:53:39 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 15:07:05 +01:00
class PercentileDisc ( AggFunc ) :
2025-02-13 15:53:39 +01:00
arg_types = { " this " : True , " expression " : False }
2025-02-13 15:07:05 +01:00
2025-02-13 06:15:54 +01:00
class Quantile ( AggFunc ) :
arg_types = { " this " : True , " quantile " : True }
2025-02-13 08:04:41 +01:00
class ApproxQuantile ( Quantile ) :
2025-02-13 15:05:06 +01:00
arg_types = { " this " : True , " quantile " : True , " accuracy " : False , " weight " : False }
2025-02-13 08:04:41 +01:00
2025-02-13 21:31:23 +01:00
class Quarter ( Func ) :
pass
2025-02-13 21:41:14 +01:00
# https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Functions-Expressions-and-Predicates/Arithmetic-Trigonometric-Hyperbolic-Operators/Functions/RANDOM/RANDOM-Function-Syntax
# teradata lower and upper bounds
2025-02-13 21:18:34 +01:00
class Rand ( Func ) :
_sql_names = [ " RAND " , " RANDOM " ]
2025-02-13 21:41:14 +01:00
arg_types = { " this " : False , " lower " : False , " upper " : False }
2025-02-13 21:18:34 +01:00
class Randn ( Func ) :
arg_types = { " this " : False }
2025-02-13 15:42:13 +01:00
class RangeN ( Func ) :
arg_types = { " this " : True , " expressions " : True , " each " : False }
2025-02-13 14:54:32 +01:00
class ReadCSV ( Func ) :
_sql_names = [ " READ_CSV " ]
is_var_len_args = True
arg_types = { " this " : True , " expressions " : False }
2025-02-13 06:15:54 +01:00
class Reduce ( Func ) :
2025-02-13 15:09:58 +01:00
arg_types = { " this " : True , " initial " : True , " merge " : True , " finish " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:26:26 +01:00
class RegexpExtract ( Func ) :
arg_types = {
" this " : True ,
2025-02-13 21:56:38 +01:00
" expression " : True ,
" position " : False ,
" occurrence " : False ,
" parameters " : False ,
" group " : False ,
}
class RegexpExtractAll ( Func ) :
arg_types = {
" this " : True ,
" expression " : True ,
" position " : False ,
" occurrence " : False ,
" parameters " : False ,
" group " : False ,
2025-02-13 15:26:26 +01:00
}
2025-02-13 20:46:55 +01:00
class RegexpReplace ( Func ) :
arg_types = {
" this " : True ,
" expression " : True ,
2025-02-13 21:19:14 +01:00
" replacement " : False ,
2025-02-13 20:46:55 +01:00
" position " : False ,
" occurrence " : False ,
2025-02-13 21:09:41 +01:00
" modifiers " : False ,
2025-02-13 20:46:55 +01:00
}
class RegexpLike ( Binary , Func ) :
2025-02-13 14:40:43 +01:00
arg_types = { " this " : True , " expression " : True , " flag " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:09:41 +01:00
class RegexpILike ( Binary , Func ) :
2025-02-13 15:01:55 +01:00
arg_types = { " this " : True , " expression " : True , " flag " : False }
2025-02-13 15:48:10 +01:00
# https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.split.html
# limit is the number of times a pattern is applied
2025-02-13 06:15:54 +01:00
class RegexpSplit ( Func ) :
2025-02-13 15:48:10 +01:00
arg_types = { " this " : True , " expression " : True , " limit " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 14:50:31 +01:00
class Repeat ( Func ) :
arg_types = { " this " : True , " times " : True }
2025-02-13 21:19:14 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16
# tsql third argument function == trunctaion if not 0
2025-02-13 06:15:54 +01:00
class Round ( Func ) :
2025-02-13 21:19:14 +01:00
arg_types = { " this " : True , " decimals " : False , " truncate " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 14:56:25 +01:00
class RowNumber ( Func ) :
2025-02-13 21:57:20 +01:00
arg_types = { " this " : False }
2025-02-13 14:56:25 +01:00
2025-02-13 06:15:54 +01:00
class SafeDivide ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 15:52:09 +01:00
class SHA ( Func ) :
_sql_names = [ " SHA " , " SHA1 " ]
class SHA2 ( Func ) :
_sql_names = [ " SHA2 " ]
arg_types = { " this " : True , " length " : False }
2025-02-13 21:29:39 +01:00
class Sign ( Func ) :
_sql_names = [ " SIGN " , " SIGNUM " ]
2025-02-13 06:15:54 +01:00
class SortArray ( Func ) :
arg_types = { " this " : True , " asc " : False }
class Split ( Func ) :
2025-02-13 14:48:46 +01:00
arg_types = { " this " : True , " expression " : True , " limit " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:56:02 +01:00
# https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.split_part.html
class SplitPart ( Func ) :
arg_types = { " this " : True , " delimiter " : True , " part_index " : True }
2025-02-13 08:04:41 +01:00
# Start may be omitted in the case of postgres
# https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
2025-02-13 06:15:54 +01:00
class Substring ( Func ) :
2025-02-13 21:55:19 +01:00
_sql_names = [ " SUBSTRING " , " SUBSTR " ]
2025-02-13 08:04:41 +01:00
arg_types = { " this " : True , " start " : False , " length " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
class StandardHash ( Func ) :
arg_types = { " this " : True , " expression " : False }
2025-02-13 20:48:36 +01:00
class StartsWith ( Func ) :
_sql_names = [ " STARTS_WITH " , " STARTSWITH " ]
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class StrPosition ( Func ) :
2025-02-13 15:05:06 +01:00
arg_types = {
" this " : True ,
" substr " : True ,
" position " : False ,
" instance " : False ,
}
2025-02-13 06:15:54 +01:00
class StrToDate ( Func ) :
2025-02-13 21:41:14 +01:00
arg_types = { " this " : True , " format " : False , " safe " : False }
2025-02-13 06:15:54 +01:00
class StrToTime ( Func ) :
2025-02-13 21:41:14 +01:00
arg_types = { " this " : True , " format " : True , " zone " : False , " safe " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:03:38 +01:00
# Spark allows unix_timestamp()
# https://spark.apache.org/docs/3.1.3/api/python/reference/api/pyspark.sql.functions.unix_timestamp.html
2025-02-13 06:15:54 +01:00
class StrToUnix ( Func ) :
2025-02-13 15:03:38 +01:00
arg_types = { " this " : False , " format " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 20:58:22 +01:00
# https://prestodb.io/docs/current/functions/string.html
# https://spark.apache.org/docs/latest/api/sql/index.html#str_to_map
class StrToMap ( Func ) :
arg_types = {
" this " : True ,
" pair_delim " : False ,
" key_value_delim " : False ,
" duplicate_resolution_callback " : False ,
}
2025-02-13 14:51:47 +01:00
class NumberToStr ( Func ) :
2025-02-13 20:58:22 +01:00
arg_types = { " this " : True , " format " : True , " culture " : False }
2025-02-13 14:51:47 +01:00
2025-02-13 15:58:40 +01:00
class FromBase ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Struct ( Func ) :
2025-02-13 21:16:09 +01:00
arg_types = { " expressions " : False }
2025-02-13 06:15:54 +01:00
is_var_len_args = True
class StructExtract ( Func ) :
arg_types = { " this " : True , " expression " : True }
2025-02-13 20:58:22 +01:00
# https://learn.microsoft.com/en-us/sql/t-sql/functions/stuff-transact-sql?view=sql-server-ver16
# https://docs.snowflake.com/en/sql-reference/functions/insert
class Stuff ( Func ) :
_sql_names = [ " STUFF " , " INSERT " ]
arg_types = { " this " : True , " start " : True , " length " : True , " expression " : True }
2025-02-13 06:15:54 +01:00
class Sum ( AggFunc ) :
pass
class Sqrt ( Func ) :
pass
class Stddev ( AggFunc ) :
2025-02-13 21:51:42 +01:00
_sql_names = [ " STDDEV " , " STDEV " ]
2025-02-13 06:15:54 +01:00
class StddevPop ( AggFunc ) :
pass
class StddevSamp ( AggFunc ) :
pass
2025-02-13 21:41:14 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions#time
class Time ( Func ) :
arg_types = { " this " : False , " zone " : False }
2025-02-13 06:15:54 +01:00
class TimeToStr ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " this " : True , " format " : True , " culture " : False , " zone " : False }
2025-02-13 06:15:54 +01:00
class TimeToTimeStr ( Func ) :
pass
class TimeToUnix ( Func ) :
pass
class TimeStrToDate ( Func ) :
pass
class TimeStrToTime ( Func ) :
2025-02-13 21:52:32 +01:00
arg_types = { " this " : True , " zone " : False }
2025-02-13 06:15:54 +01:00
class TimeStrToUnix ( Func ) :
pass
2025-02-13 08:04:41 +01:00
class Trim ( Func ) :
arg_types = {
" this " : True ,
" expression " : False ,
2025-02-13 14:54:32 +01:00
" position " : False ,
2025-02-13 08:04:41 +01:00
" collation " : False ,
}
2025-02-13 06:15:54 +01:00
class TsOrDsAdd ( Func , TimeUnit ) :
2025-02-13 21:17:09 +01:00
# return_type is used to correctly cast the arguments of this expression when transpiling it
arg_types = { " this " : True , " expression " : True , " unit " : False , " return_type " : False }
@property
def return_type ( self ) - > DataType :
return DataType . build ( self . args . get ( " return_type " ) or DataType . Type . DATE )
class TsOrDsDiff ( Func , TimeUnit ) :
2025-02-13 06:15:54 +01:00
arg_types = { " this " : True , " expression " : True , " unit " : False }
class TsOrDsToDateStr ( Func ) :
pass
class TsOrDsToDate ( Func ) :
2025-02-13 21:30:28 +01:00
arg_types = { " this " : True , " format " : False , " safe " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 21:57:20 +01:00
class TsOrDsToDatetime ( Func ) :
pass
2025-02-13 21:19:14 +01:00
class TsOrDsToTime ( Func ) :
pass
2025-02-13 21:30:28 +01:00
class TsOrDsToTimestamp ( Func ) :
pass
2025-02-13 06:15:54 +01:00
class TsOrDiToDi ( Func ) :
pass
2025-02-13 14:54:32 +01:00
class Unhex ( Func ) :
pass
2025-02-13 21:19:14 +01:00
# https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#unix_date
class UnixDate ( Func ) :
pass
2025-02-13 06:15:54 +01:00
class UnixToStr ( Func ) :
2025-02-13 14:50:31 +01:00
arg_types = { " this " : True , " format " : False }
2025-02-13 06:15:54 +01:00
2025-02-13 15:05:06 +01:00
# https://prestodb.io/docs/current/functions/datetime.html
# presto has weird zone/hours/minutes
2025-02-13 06:15:54 +01:00
class UnixToTime ( Func ) :
2025-02-13 21:30:28 +01:00
arg_types = {
" this " : True ,
" scale " : False ,
" zone " : False ,
" hours " : False ,
" minutes " : False ,
" format " : False ,
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:19:14 +01:00
SECONDS = Literal . number ( 0 )
DECIS = Literal . number ( 1 )
CENTIS = Literal . number ( 2 )
MILLIS = Literal . number ( 3 )
DECIMILLIS = Literal . number ( 4 )
CENTIMILLIS = Literal . number ( 5 )
MICROS = Literal . number ( 6 )
DECIMICROS = Literal . number ( 7 )
CENTIMICROS = Literal . number ( 8 )
NANOS = Literal . number ( 9 )
2025-02-13 06:15:54 +01:00
class UnixToTimeStr ( Func ) :
pass
2025-02-13 21:56:38 +01:00
class UnixSeconds ( Func ) :
pass
2025-02-13 21:54:47 +01:00
class Uuid ( Func ) :
_sql_names = [ " UUID " , " GEN_RANDOM_UUID " , " GENERATE_UUID " , " UUID_STRING " ]
arg_types = { " this " : False , " name " : False }
2025-02-13 21:17:51 +01:00
class TimestampFromParts ( Func ) :
2025-02-13 21:19:14 +01:00
_sql_names = [ " TIMESTAMP_FROM_PARTS " , " TIMESTAMPFROMPARTS " ]
2025-02-13 21:17:51 +01:00
arg_types = {
" year " : True ,
" month " : True ,
" day " : True ,
" hour " : True ,
" min " : True ,
" sec " : True ,
2025-02-13 21:19:14 +01:00
" nano " : False ,
" zone " : False ,
" milli " : False ,
2025-02-13 21:17:51 +01:00
}
2025-02-13 06:15:54 +01:00
class Upper ( Func ) :
2025-02-13 14:54:32 +01:00
_sql_names = [ " UPPER " , " UCASE " ]
2025-02-13 06:15:54 +01:00
2025-02-13 21:30:28 +01:00
class Corr ( Binary , AggFunc ) :
pass
2025-02-13 06:15:54 +01:00
class Variance ( AggFunc ) :
_sql_names = [ " VARIANCE " , " VARIANCE_SAMP " , " VAR_SAMP " ]
class VariancePop ( AggFunc ) :
_sql_names = [ " VARIANCE_POP " , " VAR_POP " ]
2025-02-13 21:30:28 +01:00
class CovarSamp ( Binary , AggFunc ) :
pass
class CovarPop ( Binary , AggFunc ) :
pass
2025-02-13 06:15:54 +01:00
class Week ( Func ) :
arg_types = { " this " : True , " mode " : False }
2025-02-13 15:26:26 +01:00
class XMLTable ( Func ) :
arg_types = { " this " : True , " passing " : False , " columns " : False , " by_ref " : False }
2025-02-13 06:15:54 +01:00
class Year ( Func ) :
pass
2025-02-13 14:53:05 +01:00
class Use ( Expression ) :
2025-02-13 15:08:15 +01:00
arg_types = { " this " : True , " kind " : False }
2025-02-13 14:53:05 +01:00
2025-02-13 21:55:19 +01:00
class Merge ( DML ) :
2025-02-13 21:29:39 +01:00
arg_types = {
" this " : True ,
" using " : True ,
" on " : True ,
" expressions " : True ,
" with " : False ,
2025-02-13 21:54:47 +01:00
" returning " : False ,
2025-02-13 21:29:39 +01:00
}
2025-02-13 14:58:37 +01:00
class When ( Func ) :
2025-02-13 15:43:32 +01:00
arg_types = { " matched " : True , " source " : False , " condition " : False , " then " : True }
2025-02-13 14:58:37 +01:00
2025-02-13 15:52:09 +01:00
# https://docs.oracle.com/javadb/10.8.3.0/ref/rrefsqljnextvaluefor.html
# https://learn.microsoft.com/en-us/sql/t-sql/functions/next-value-for-transact-sql?view=sql-server-ver16
class NextValueFor ( Func ) :
arg_types = { " this " : True , " order " : False }
2025-02-13 21:35:32 +01:00
# Refers to a trailing semi-colon. This is only used to preserve trailing comments
# select 1; -- my comment
class Semicolon ( Expression ) :
arg_types = { }
2025-02-13 06:15:54 +01:00
def _norm_arg ( arg ) :
2025-02-13 15:48:10 +01:00
return arg . lower ( ) if type ( arg ) is str else arg
2025-02-13 06:15:54 +01:00
2025-02-13 14:40:43 +01:00
ALL_FUNCTIONS = subclasses ( __name__ , Func , ( AggFunc , Anonymous , Func ) )
2025-02-13 21:17:09 +01:00
FUNCTION_BY_NAME = { name : func for func in ALL_FUNCTIONS for name in func . sql_names ( ) }
2025-02-13 06:15:54 +01:00
2025-02-13 21:20:36 +01:00
JSON_PATH_PARTS = subclasses ( __name__ , JSONPathPart , ( JSONPathPart , ) )
2025-02-13 21:32:41 +01:00
PERCENTILES = ( PercentileCont , PercentileDisc )
2025-02-13 06:15:54 +01:00
2025-02-13 15:07:05 +01:00
# Helpers
2025-02-13 15:52:09 +01:00
@t.overload
def maybe_parse (
sql_or_expression : ExpOrStr ,
* ,
into : t . Type [ E ] ,
dialect : DialectType = None ,
prefix : t . Optional [ str ] = None ,
copy : bool = False ,
* * opts ,
2025-02-13 21:29:39 +01:00
) - > E : . . .
2025-02-13 15:52:09 +01:00
@t.overload
def maybe_parse (
sql_or_expression : str | E ,
* ,
into : t . Optional [ IntoType ] = None ,
dialect : DialectType = None ,
prefix : t . Optional [ str ] = None ,
copy : bool = False ,
* * opts ,
2025-02-13 21:29:39 +01:00
) - > E : . . .
2025-02-13 15:52:09 +01:00
2025-02-13 06:15:54 +01:00
def maybe_parse (
2025-02-13 15:46:19 +01:00
sql_or_expression : ExpOrStr ,
2025-02-13 06:15:54 +01:00
* ,
2025-02-13 15:07:05 +01:00
into : t . Optional [ IntoType ] = None ,
2025-02-13 15:09:58 +01:00
dialect : DialectType = None ,
2025-02-13 15:07:05 +01:00
prefix : t . Optional [ str ] = None ,
2025-02-13 15:26:26 +01:00
copy : bool = False ,
2025-02-13 06:15:54 +01:00
* * opts ,
2025-02-13 15:03:38 +01:00
) - > Expression :
2025-02-13 06:15:54 +01:00
""" Gracefully handle a possible string or expression.
Example :
>> > maybe_parse ( " 1 " )
2025-02-13 21:19:14 +01:00
Literal ( this = 1 , is_string = False )
2025-02-13 06:15:54 +01:00
>> > maybe_parse ( to_identifier ( " x " ) )
2025-02-13 21:19:14 +01:00
Identifier ( this = x , quoted = False )
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 15:07:05 +01:00
sql_or_expression : the SQL code string or an expression
into : the SQLGlot Expression to parse into
dialect : the dialect used to parse the input expressions ( in the case that an
2025-02-13 06:15:54 +01:00
input expression is a SQL string ) .
2025-02-13 15:07:05 +01:00
prefix : a string to prefix the sql with before it gets parsed
2025-02-13 06:15:54 +01:00
( automatically includes a space )
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions ( again , in the case
that an input expression is a SQL string ) .
Returns :
Expression : the parsed or given expression .
"""
if isinstance ( sql_or_expression , Expression ) :
2025-02-13 15:26:26 +01:00
if copy :
return sql_or_expression . copy ( )
2025-02-13 06:15:54 +01:00
return sql_or_expression
2025-02-13 15:57:23 +01:00
if sql_or_expression is None :
2025-02-13 21:20:36 +01:00
raise ParseError ( " SQL cannot be None " )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
import sqlglot
sql = str ( sql_or_expression )
if prefix :
sql = f " { prefix } { sql } "
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
return sqlglot . parse_one ( sql , read = dialect , into = into , * * opts )
2025-02-13 20:51:40 +01:00
@t.overload
2025-02-13 21:29:39 +01:00
def maybe_copy ( instance : None , copy : bool = True ) - > None : . . .
2025-02-13 20:51:40 +01:00
@t.overload
2025-02-13 21:29:39 +01:00
def maybe_copy ( instance : E , copy : bool = True ) - > E : . . .
2025-02-13 20:51:40 +01:00
def maybe_copy ( instance , copy = True ) :
return instance . copy ( ) if copy and instance else instance
2025-02-13 06:15:54 +01:00
2025-02-13 21:19:14 +01:00
def _to_s ( node : t . Any , verbose : bool = False , level : int = 0 ) - > str :
""" Generate a textual representation of an Expression tree """
indent = " \n " + ( " " * ( level + 1 ) )
delim = f " , { indent } "
if isinstance ( node , Expression ) :
args = { k : v for k , v in node . args . items ( ) if ( v is not None and v != [ ] ) or verbose }
if ( node . type or verbose ) and not isinstance ( node , DataType ) :
args [ " _type " ] = node . type
if node . comments or verbose :
args [ " _comments " ] = node . comments
if verbose :
args [ " _id " ] = id ( node )
# Inline leaves for a more compact representation
if node . is_leaf ( ) :
indent = " "
delim = " , "
items = delim . join ( [ f " { k } = { _to_s ( v , verbose , level + 1 ) } " for k , v in args . items ( ) ] )
return f " { node . __class__ . __name__ } ( { indent } { items } ) "
if isinstance ( node , list ) :
items = delim . join ( _to_s ( i , verbose , level + 1 ) for i in node )
items = f " { indent } { items } " if items else " "
return f " [ { items } ] "
# Indent multiline strings to match the current level
return indent . join ( textwrap . dedent ( str ( node ) . strip ( " \n " ) ) . splitlines ( ) )
2025-02-13 06:15:54 +01:00
def _is_wrong_expression ( expression , into ) :
return isinstance ( expression , Expression ) and not isinstance ( expression , into )
def _apply_builder (
expression ,
instance ,
arg ,
copy = True ,
prefix = None ,
into = None ,
dialect = None ,
2025-02-13 21:03:38 +01:00
into_arg = " this " ,
2025-02-13 06:15:54 +01:00
* * opts ,
) :
if _is_wrong_expression ( expression , into ) :
2025-02-13 21:03:38 +01:00
expression = into ( * * { into_arg : expression } )
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( instance , copy )
2025-02-13 06:15:54 +01:00
expression = maybe_parse (
sql_or_expression = expression ,
prefix = prefix ,
into = into ,
dialect = dialect ,
* * opts ,
)
instance . set ( arg , expression )
return instance
def _apply_child_list_builder (
* expressions ,
instance ,
arg ,
append = True ,
copy = True ,
prefix = None ,
into = None ,
dialect = None ,
properties = None ,
* * opts ,
) :
2025-02-13 20:48:36 +01:00
instance = maybe_copy ( instance , copy )
2025-02-13 06:15:54 +01:00
parsed = [ ]
2025-02-13 21:37:40 +01:00
properties = { } if properties is None else properties
2025-02-13 06:15:54 +01:00
for expression in expressions :
2025-02-13 15:57:23 +01:00
if expression is not None :
if _is_wrong_expression ( expression , into ) :
expression = into ( expressions = [ expression ] )
expression = maybe_parse (
expression ,
into = into ,
dialect = dialect ,
prefix = prefix ,
* * opts ,
)
2025-02-13 21:37:40 +01:00
for k , v in expression . args . items ( ) :
if k == " expressions " :
parsed . extend ( v )
else :
properties [ k ] = v
2025-02-13 06:15:54 +01:00
existing = instance . args . get ( arg )
if append and existing :
parsed = existing . expressions + parsed
child = into ( expressions = parsed )
2025-02-13 21:37:40 +01:00
for k , v in properties . items ( ) :
2025-02-13 06:15:54 +01:00
child . set ( k , v )
instance . set ( arg , child )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
return instance
def _apply_list_builder (
* expressions ,
instance ,
arg ,
append = True ,
copy = True ,
prefix = None ,
into = None ,
dialect = None ,
* * opts ,
) :
2025-02-13 20:48:36 +01:00
inst = maybe_copy ( instance , copy )
2025-02-13 06:15:54 +01:00
expressions = [
maybe_parse (
sql_or_expression = expression ,
into = into ,
prefix = prefix ,
dialect = dialect ,
* * opts ,
)
for expression in expressions
2025-02-13 15:57:23 +01:00
if expression is not None
2025-02-13 06:15:54 +01:00
]
existing_expressions = inst . args . get ( arg )
if append and existing_expressions :
expressions = existing_expressions + expressions
inst . set ( arg , expressions )
return inst
def _apply_conjunction_builder (
* expressions ,
instance ,
arg ,
into = None ,
append = True ,
copy = True ,
dialect = None ,
* * opts ,
) :
expressions = [ exp for exp in expressions if exp is not None and exp != " " ]
if not expressions :
return instance
2025-02-13 20:48:36 +01:00
inst = maybe_copy ( instance , copy )
2025-02-13 06:15:54 +01:00
existing = inst . args . get ( arg )
if append and existing is not None :
expressions = [ existing . this if into else existing ] + list ( expressions )
2025-02-13 15:53:39 +01:00
node = and_ ( * expressions , dialect = dialect , copy = copy , * * opts )
2025-02-13 06:15:54 +01:00
inst . set ( arg , into ( this = node ) if into else node )
return inst
2025-02-13 15:57:23 +01:00
def _apply_cte_builder (
instance : E ,
alias : ExpOrStr ,
as_ : ExpOrStr ,
recursive : t . Optional [ bool ] = None ,
2025-02-13 21:54:13 +01:00
materialized : t . Optional [ bool ] = None ,
2025-02-13 15:57:23 +01:00
append : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > E :
alias_expression = maybe_parse ( alias , dialect = dialect , into = TableAlias , * * opts )
as_expression = maybe_parse ( as_ , dialect = dialect , * * opts )
2025-02-13 21:54:13 +01:00
cte = CTE ( this = as_expression , alias = alias_expression , materialized = materialized )
2025-02-13 15:57:23 +01:00
return _apply_child_list_builder (
cte ,
instance = instance ,
arg = " with " ,
append = append ,
copy = copy ,
into = With ,
properties = { " recursive " : recursive or False } ,
)
def _combine (
expressions : t . Sequence [ t . Optional [ ExpOrStr ] ] ,
operator : t . Type [ Connector ] ,
dialect : DialectType = None ,
copy : bool = True ,
2025-02-13 21:56:38 +01:00
wrap : bool = True ,
2025-02-13 15:57:23 +01:00
* * opts ,
) - > Expression :
conditions = [
condition ( expression , dialect = dialect , copy = copy , * * opts )
for expression in expressions
if expression is not None
2025-02-13 15:53:39 +01:00
]
2025-02-13 15:57:23 +01:00
this , * rest = conditions
2025-02-13 21:56:38 +01:00
if rest and wrap :
2025-02-13 15:52:09 +01:00
this = _wrap ( this , Connector )
2025-02-13 15:57:23 +01:00
for expression in rest :
2025-02-13 21:56:38 +01:00
this = operator ( this = this , expression = _wrap ( expression , Connector ) if wrap else expression )
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
return this
2025-02-13 21:57:20 +01:00
@t.overload
def _wrap ( expression : None , kind : t . Type [ Expression ] ) - > None : . . .
@t.overload
def _wrap ( expression : E , kind : t . Type [ Expression ] ) - > E | Paren : . . .
def _wrap ( expression : t . Optional [ E ] , kind : t . Type [ Expression ] ) - > t . Optional [ E ] | Paren :
2025-02-13 15:57:23 +01:00
return Paren ( this = expression ) if isinstance ( expression , kind ) else expression
2025-02-13 06:15:54 +01:00
2025-02-13 21:56:02 +01:00
def _apply_set_operation (
* expressions : ExpOrStr ,
set_operation : t . Type [ S ] ,
distinct : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > S :
return reduce (
lambda x , y : set_operation ( this = x , expression = y , distinct = distinct ) ,
( maybe_parse ( e , dialect = dialect , copy = copy , * * opts ) for e in expressions ) ,
)
2025-02-13 15:57:23 +01:00
def union (
2025-02-13 21:56:02 +01:00
* expressions : ExpOrStr ,
2025-02-13 21:16:09 +01:00
distinct : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 15:57:23 +01:00
) - > Union :
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
Initializes a syntax tree for the ` UNION ` operation .
2025-02-13 14:45:11 +01:00
Example :
>> > union ( " SELECT * FROM foo " , " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo UNION SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings , corresponding to the ` UNION ` ' s operands.
If ` Expression ` instances are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 15:57:23 +01:00
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Union instance .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
assert len ( expressions ) > = 2 , " At least two expressions are required by `union`. "
return _apply_set_operation (
* expressions , set_operation = Union , distinct = distinct , dialect = dialect , copy = copy , * * opts
)
2025-02-13 14:45:11 +01:00
2025-02-13 15:57:23 +01:00
def intersect (
2025-02-13 21:56:02 +01:00
* expressions : ExpOrStr ,
2025-02-13 21:16:09 +01:00
distinct : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 15:57:23 +01:00
) - > Intersect :
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
Initializes a syntax tree for the ` INTERSECT ` operation .
2025-02-13 14:45:11 +01:00
Example :
>> > intersect ( " SELECT * FROM foo " , " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo INTERSECT SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings , corresponding to the ` INTERSECT ` ' s operands.
If ` Expression ` instances are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 15:57:23 +01:00
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Intersect instance .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
assert len ( expressions ) > = 2 , " At least two expressions are required by `intersect`. "
return _apply_set_operation (
* expressions , set_operation = Intersect , distinct = distinct , dialect = dialect , copy = copy , * * opts
)
2025-02-13 14:45:11 +01:00
2025-02-13 15:57:23 +01:00
def except_ (
2025-02-13 21:56:02 +01:00
* expressions : ExpOrStr ,
2025-02-13 21:16:09 +01:00
distinct : bool = True ,
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
2025-02-13 15:57:23 +01:00
) - > Except :
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
Initializes a syntax tree for the ` EXCEPT ` operation .
2025-02-13 14:45:11 +01:00
Example :
>> > except_ ( " SELECT * FROM foo " , " SELECT * FROM bla " ) . sql ( )
' SELECT * FROM foo EXCEPT SELECT * FROM bla '
Args :
2025-02-13 21:56:02 +01:00
expressions : the SQL code strings , corresponding to the ` EXCEPT ` ' s operands.
If ` Expression ` instances are passed , they will be used as - is .
2025-02-13 15:57:23 +01:00
distinct : set the DISTINCT flag if and only if this is true .
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 15:57:23 +01:00
opts : other options to use to parse the input expressions .
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Except instance .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:56:02 +01:00
assert len ( expressions ) > = 2 , " At least two expressions are required by `except_`. "
return _apply_set_operation (
* expressions , set_operation = Except , distinct = distinct , dialect = dialect , copy = copy , * * opts
)
2025-02-13 14:45:11 +01:00
2025-02-13 15:46:19 +01:00
def select ( * expressions : ExpOrStr , dialect : DialectType = None , * * opts ) - > Select :
2025-02-13 06:15:54 +01:00
"""
Initializes a syntax tree from one or multiple SELECT expressions .
Example :
>> > select ( " col1 " , " col2 " ) . from_ ( " tbl " ) . sql ( )
' SELECT col1, col2 FROM tbl '
Args :
2025-02-13 15:26:26 +01:00
* expressions : the SQL code string to parse as the expressions of a
2025-02-13 06:15:54 +01:00
SELECT statement . If an Expression instance is passed , this is used as - is .
2025-02-13 15:26:26 +01:00
dialect : the dialect used to parse the input expressions ( in the case that an
2025-02-13 06:15:54 +01:00
input expression is a SQL string ) .
* * opts : other options to use to parse the input expressions ( again , in the case
that an input expression is a SQL string ) .
Returns :
Select : the syntax tree for the SELECT statement .
"""
return Select ( ) . select ( * expressions , dialect = dialect , * * opts )
2025-02-13 15:57:23 +01:00
def from_ ( expression : ExpOrStr , dialect : DialectType = None , * * opts ) - > Select :
2025-02-13 06:15:54 +01:00
"""
Initializes a syntax tree from a FROM expression .
Example :
>> > from_ ( " tbl " ) . select ( " col1 " , " col2 " ) . sql ( )
' SELECT col1, col2 FROM tbl '
Args :
2025-02-13 15:57:23 +01:00
* expression : the SQL code string to parse as the FROM expressions of a
2025-02-13 06:15:54 +01:00
SELECT statement . If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression ( in the case that the
2025-02-13 06:15:54 +01:00
input expression is a SQL string ) .
* * opts : other options to use to parse the input expressions ( again , in the case
that the input expression is a SQL string ) .
Returns :
Select : the syntax tree for the SELECT statement .
"""
2025-02-13 15:57:23 +01:00
return Select ( ) . from_ ( expression , dialect = dialect , * * opts )
2025-02-13 06:15:54 +01:00
2025-02-13 15:46:19 +01:00
def update (
table : str | Table ,
2025-02-13 21:55:40 +01:00
properties : t . Optional [ dict ] = None ,
2025-02-13 15:46:19 +01:00
where : t . Optional [ ExpOrStr ] = None ,
from_ : t . Optional [ ExpOrStr ] = None ,
2025-02-13 21:55:40 +01:00
with_ : t . Optional [ t . Dict [ str , ExpOrStr ] ] = None ,
2025-02-13 15:46:19 +01:00
dialect : DialectType = None ,
* * opts ,
) - > Update :
2025-02-13 14:40:43 +01:00
"""
Creates an update statement .
Example :
2025-02-13 21:55:40 +01:00
>> > update ( " my_table " , { " x " : 1 , " y " : " 2 " , " z " : None } , from_ = " baz_cte " , where = " baz_cte.id > 1 and my_table.id = baz_cte.id " , with_ = { " baz_cte " : " SELECT id FROM foo " } ) . sql ( )
" WITH baz_cte AS (SELECT id FROM foo) UPDATE my_table SET x = 1, y = ' 2 ' , z = NULL FROM baz_cte WHERE baz_cte.id > 1 AND my_table.id = baz_cte.id "
2025-02-13 14:40:43 +01:00
Args :
2025-02-13 21:55:40 +01:00
properties : dictionary of properties to SET which are
2025-02-13 14:40:43 +01:00
auto converted to sql objects eg None - > NULL
2025-02-13 15:46:19 +01:00
where : sql conditional parsed into a WHERE statement
from_ : sql statement parsed into a FROM statement
2025-02-13 21:55:40 +01:00
with_ : dictionary of CTE aliases / select statements to include in a WITH clause .
2025-02-13 15:46:19 +01:00
dialect : the dialect used to parse the input expressions .
2025-02-13 14:40:43 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
Update : the syntax tree for the UPDATE statement .
"""
2025-02-13 15:46:19 +01:00
update_expr = Update ( this = maybe_parse ( table , into = Table , dialect = dialect ) )
2025-02-13 21:55:40 +01:00
if properties :
update_expr . set (
" expressions " ,
[
EQ ( this = maybe_parse ( k , dialect = dialect , * * opts ) , expression = convert ( v ) )
for k , v in properties . items ( )
] ,
)
2025-02-13 14:40:43 +01:00
if from_ :
2025-02-13 15:46:19 +01:00
update_expr . set (
2025-02-13 15:01:55 +01:00
" from " ,
maybe_parse ( from_ , into = From , dialect = dialect , prefix = " FROM " , * * opts ) ,
)
2025-02-13 14:50:31 +01:00
if isinstance ( where , Condition ) :
where = Where ( this = where )
2025-02-13 14:40:43 +01:00
if where :
2025-02-13 15:46:19 +01:00
update_expr . set (
2025-02-13 15:01:55 +01:00
" where " ,
maybe_parse ( where , into = Where , dialect = dialect , prefix = " WHERE " , * * opts ) ,
)
2025-02-13 21:55:40 +01:00
if with_ :
cte_list = [
alias_ ( CTE ( this = maybe_parse ( qry , dialect = dialect , * * opts ) ) , alias , table = True )
for alias , qry in with_ . items ( )
]
update_expr . set (
" with " ,
With ( expressions = cte_list ) ,
)
2025-02-13 15:46:19 +01:00
return update_expr
2025-02-13 14:40:43 +01:00
2025-02-13 15:46:19 +01:00
def delete (
table : ExpOrStr ,
where : t . Optional [ ExpOrStr ] = None ,
returning : t . Optional [ ExpOrStr ] = None ,
dialect : DialectType = None ,
* * opts ,
) - > Delete :
2025-02-13 14:46:58 +01:00
"""
Builds a delete statement .
Example :
>> > delete ( " my_table " , where = " id > 1 " ) . sql ( )
' DELETE FROM my_table WHERE id > 1 '
Args :
2025-02-13 15:46:19 +01:00
where : sql conditional parsed into a WHERE statement
returning : sql conditional parsed into a RETURNING statement
dialect : the dialect used to parse the input expressions .
2025-02-13 14:46:58 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
Delete : the syntax tree for the DELETE statement .
"""
2025-02-13 15:46:19 +01:00
delete_expr = Delete ( ) . delete ( table , dialect = dialect , copy = False , * * opts )
if where :
delete_expr = delete_expr . where ( where , dialect = dialect , copy = False , * * opts )
if returning :
2025-02-13 21:55:19 +01:00
delete_expr = delete_expr . returning ( returning , dialect = dialect , copy = False , * * opts )
2025-02-13 15:46:19 +01:00
return delete_expr
2025-02-13 14:46:58 +01:00
2025-02-13 15:57:23 +01:00
def insert (
expression : ExpOrStr ,
into : ExpOrStr ,
2025-02-13 21:19:14 +01:00
columns : t . Optional [ t . Sequence [ str | Identifier ] ] = None ,
2025-02-13 15:57:23 +01:00
overwrite : t . Optional [ bool ] = None ,
2025-02-13 21:17:09 +01:00
returning : t . Optional [ ExpOrStr ] = None ,
2025-02-13 15:57:23 +01:00
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Insert :
"""
Builds an INSERT statement .
Example :
>> > insert ( " VALUES (1, 2, 3) " , " tbl " ) . sql ( )
' INSERT INTO tbl VALUES (1, 2, 3) '
Args :
expression : the sql string or expression of the INSERT statement
into : the tbl to insert data to .
columns : optionally the table ' s column names.
overwrite : whether to INSERT OVERWRITE or not .
2025-02-13 21:17:09 +01:00
returning : sql conditional parsed into a RETURNING statement
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expressions .
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 15:57:23 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
Insert : the syntax tree for the INSERT statement .
"""
expr = maybe_parse ( expression , dialect = dialect , copy = copy , * * opts )
this : Table | Schema = maybe_parse ( into , into = Table , dialect = dialect , copy = copy , * * opts )
if columns :
2025-02-13 21:19:14 +01:00
this = Schema ( this = this , expressions = [ to_identifier ( c , copy = copy ) for c in columns ] )
2025-02-13 15:57:23 +01:00
2025-02-13 21:17:09 +01:00
insert = Insert ( this = this , expression = expr , overwrite = overwrite )
if returning :
2025-02-13 21:55:19 +01:00
insert = insert . returning ( returning , dialect = dialect , copy = False , * * opts )
2025-02-13 21:17:09 +01:00
return insert
2025-02-13 15:57:23 +01:00
2025-02-13 21:54:47 +01:00
def merge (
* when_exprs : ExpOrStr ,
into : ExpOrStr ,
using : ExpOrStr ,
on : ExpOrStr ,
2025-02-13 21:55:19 +01:00
returning : t . Optional [ ExpOrStr ] = None ,
2025-02-13 21:54:47 +01:00
dialect : DialectType = None ,
copy : bool = True ,
* * opts ,
) - > Merge :
"""
Builds a MERGE statement .
Example :
>> > merge ( " WHEN MATCHED THEN UPDATE SET col1 = source_table.col1 " ,
. . . " WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1) " ,
. . . into = " my_table " ,
. . . using = " source_table " ,
. . . on = " my_table.id = source_table.id " ) . sql ( )
' MERGE INTO my_table USING source_table ON my_table.id = source_table.id WHEN MATCHED THEN UPDATE SET col1 = source_table.col1 WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1) '
Args :
* when_exprs : The WHEN clauses specifying actions for matched and unmatched rows .
into : The target table to merge data into .
using : The source table to merge data from .
on : The join condition for the merge .
2025-02-13 21:55:19 +01:00
returning : The columns to return from the merge .
2025-02-13 21:54:47 +01:00
dialect : The dialect used to parse the input expressions .
copy : Whether to copy the expression .
* * opts : Other options to use to parse the input expressions .
Returns :
Merge : The syntax tree for the MERGE statement .
"""
2025-02-13 21:55:19 +01:00
merge = Merge (
2025-02-13 21:54:47 +01:00
this = maybe_parse ( into , dialect = dialect , copy = copy , * * opts ) ,
using = maybe_parse ( using , dialect = dialect , copy = copy , * * opts ) ,
on = maybe_parse ( on , dialect = dialect , copy = copy , * * opts ) ,
expressions = [
maybe_parse ( when_expr , dialect = dialect , copy = copy , into = When , * * opts )
for when_expr in when_exprs
] ,
)
2025-02-13 21:55:19 +01:00
if returning :
merge = merge . returning ( returning , dialect = dialect , copy = False , * * opts )
return merge
2025-02-13 21:54:47 +01:00
2025-02-13 15:57:23 +01:00
def condition (
expression : ExpOrStr , dialect : DialectType = None , copy : bool = True , * * opts
) - > Condition :
2025-02-13 06:15:54 +01:00
"""
Initialize a logical condition expression .
Example :
>> > condition ( " x=1 " ) . sql ( )
' x = 1 '
This is helpful for composing larger logical syntax trees :
>> > where = condition ( " x=1 " )
>> > where = where . and_ ( " y=1 " )
>> > Select ( ) . from_ ( " tbl " ) . select ( " * " ) . where ( where ) . sql ( )
' SELECT * FROM tbl WHERE x = 1 AND y = 1 '
Args :
2025-02-13 15:57:23 +01:00
* expression : the SQL code string to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression ( in the case that the
2025-02-13 06:15:54 +01:00
input expression is a SQL string ) .
2025-02-13 21:28:36 +01:00
copy : Whether to copy ` expression ` ( only applies to expressions ) .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions ( again , in the case
that the input expression is a SQL string ) .
Returns :
2025-02-13 15:57:23 +01:00
The new Condition instance
2025-02-13 06:15:54 +01:00
"""
2025-02-13 15:57:23 +01:00
return maybe_parse (
2025-02-13 06:15:54 +01:00
expression ,
into = Condition ,
dialect = dialect ,
2025-02-13 15:53:39 +01:00
copy = copy ,
2025-02-13 06:15:54 +01:00
* * opts ,
)
2025-02-13 15:57:23 +01:00
def and_ (
2025-02-13 21:56:38 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
dialect : DialectType = None ,
copy : bool = True ,
wrap : bool = True ,
* * opts ,
2025-02-13 15:57:23 +01:00
) - > Condition :
2025-02-13 06:15:54 +01:00
"""
Combine multiple conditions with an AND logical operator .
Example :
>> > and_ ( " x=1 " , and_ ( " y=1 " , " z=1 " ) ) . sql ( )
' x = 1 AND (y = 1 AND z = 1) '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy ` expressions ` ( only applies to Expressions ) .
2025-02-13 21:56:38 +01:00
wrap : whether to wrap the operands in ` Paren ` s . This is true by default to avoid
precedence issues , but can be turned off when the produced AST is too deep and
causes recursion - related issues .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
2025-02-13 21:33:25 +01:00
The new condition
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:56:38 +01:00
return t . cast ( Condition , _combine ( expressions , And , dialect , copy = copy , wrap = wrap , * * opts ) )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def or_ (
2025-02-13 21:56:38 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
dialect : DialectType = None ,
copy : bool = True ,
wrap : bool = True ,
* * opts ,
2025-02-13 15:57:23 +01:00
) - > Condition :
2025-02-13 06:15:54 +01:00
"""
Combine multiple conditions with an OR logical operator .
Example :
>> > or_ ( " x=1 " , or_ ( " y=1 " , " z=1 " ) ) . sql ( )
' x = 1 OR (y = 1 OR z = 1) '
Args :
2025-02-13 15:57:23 +01:00
* expressions : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : whether to copy ` expressions ` ( only applies to Expressions ) .
2025-02-13 21:56:38 +01:00
wrap : whether to wrap the operands in ` Paren ` s . This is true by default to avoid
precedence issues , but can be turned off when the produced AST is too deep and
causes recursion - related issues .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
2025-02-13 21:33:25 +01:00
The new condition
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:56:38 +01:00
return t . cast ( Condition , _combine ( expressions , Or , dialect , copy = copy , wrap = wrap , * * opts ) )
2025-02-13 06:15:54 +01:00
2025-02-13 21:33:25 +01:00
def xor (
2025-02-13 21:56:38 +01:00
* expressions : t . Optional [ ExpOrStr ] ,
dialect : DialectType = None ,
copy : bool = True ,
wrap : bool = True ,
* * opts ,
2025-02-13 21:33:25 +01:00
) - > Condition :
"""
Combine multiple conditions with an XOR logical operator .
Example :
>> > xor ( " x=1 " , xor ( " y=1 " , " z=1 " ) ) . sql ( )
' x = 1 XOR (y = 1 XOR z = 1) '
Args :
* expressions : the SQL code strings to parse .
If an Expression instance is passed , this is used as - is .
dialect : the dialect used to parse the input expression .
copy : whether to copy ` expressions ` ( only applies to Expressions ) .
2025-02-13 21:56:38 +01:00
wrap : whether to wrap the operands in ` Paren ` s . This is true by default to avoid
precedence issues , but can be turned off when the produced AST is too deep and
causes recursion - related issues .
2025-02-13 21:33:25 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
The new condition
"""
2025-02-13 21:56:38 +01:00
return t . cast ( Condition , _combine ( expressions , Xor , dialect , copy = copy , wrap = wrap , * * opts ) )
2025-02-13 21:33:25 +01:00
2025-02-13 15:57:23 +01:00
def not_ ( expression : ExpOrStr , dialect : DialectType = None , copy : bool = True , * * opts ) - > Not :
2025-02-13 06:15:54 +01:00
"""
Wrap a condition with a NOT operator .
Example :
>> > not_ ( " this_suit= ' black ' " ) . sql ( )
" NOT this_suit = ' black ' "
Args :
2025-02-13 15:57:23 +01:00
expression : the SQL code string to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
dialect : the dialect used to parse the input expression .
copy : whether to copy the expression or not .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
2025-02-13 15:57:23 +01:00
The new condition .
2025-02-13 06:15:54 +01:00
"""
this = condition (
expression ,
dialect = dialect ,
2025-02-13 15:53:39 +01:00
copy = copy ,
2025-02-13 06:15:54 +01:00
* * opts ,
)
2025-02-13 15:52:09 +01:00
return Not ( this = _wrap ( this , Connector ) )
2025-02-13 06:15:54 +01:00
2025-02-13 15:57:23 +01:00
def paren ( expression : ExpOrStr , copy : bool = True ) - > Paren :
"""
Wrap an expression in parentheses .
Example :
>> > paren ( " 5 + 3 " ) . sql ( )
' (5 + 3) '
Args :
expression : the SQL code string to parse .
If an Expression instance is passed , this is used as - is .
copy : whether to copy the expression or not .
Returns :
The wrapped expression .
"""
return Paren ( this = maybe_parse ( expression , copy = copy ) )
2025-02-13 06:15:54 +01:00
2025-02-13 21:20:36 +01:00
SAFE_IDENTIFIER_RE : t . Pattern [ str ] = re . compile ( r " ^[_a-zA-Z][ \ w]*$ " )
2025-02-13 06:15:54 +01:00
2025-02-13 15:23:26 +01:00
@t.overload
2025-02-13 21:29:39 +01:00
def to_identifier ( name : None , quoted : t . Optional [ bool ] = None , copy : bool = True ) - > None : . . .
2025-02-13 15:23:26 +01:00
@t.overload
2025-02-13 15:57:23 +01:00
def to_identifier (
name : str | Identifier , quoted : t . Optional [ bool ] = None , copy : bool = True
2025-02-13 21:29:39 +01:00
) - > Identifier : . . .
2025-02-13 15:23:26 +01:00
2025-02-13 15:57:23 +01:00
def to_identifier ( name , quoted = None , copy = True ) :
2025-02-13 15:23:26 +01:00
""" Builds an identifier.
Args :
name : The name to turn into an identifier .
2025-02-13 21:28:36 +01:00
quoted : Whether to force quote the identifier .
copy : Whether to copy name if it ' s an Identifier.
2025-02-13 15:23:26 +01:00
Returns :
The identifier ast node .
"""
if name is None :
2025-02-13 06:15:54 +01:00
return None
2025-02-13 15:23:26 +01:00
if isinstance ( name , Identifier ) :
2025-02-13 20:48:36 +01:00
identifier = maybe_copy ( name , copy )
2025-02-13 15:23:26 +01:00
elif isinstance ( name , str ) :
identifier = Identifier (
this = name ,
2025-02-13 15:48:10 +01:00
quoted = not SAFE_IDENTIFIER_RE . match ( name ) if quoted is None else quoted ,
2025-02-13 15:23:26 +01:00
)
2025-02-13 06:15:54 +01:00
else :
2025-02-13 15:23:26 +01:00
raise ValueError ( f " Name needs to be a string or an Identifier, got: { name . __class__ } " )
2025-02-13 06:15:54 +01:00
return identifier
2025-02-13 21:17:09 +01:00
def parse_identifier ( name : str | Identifier , dialect : DialectType = None ) - > Identifier :
2025-02-13 21:16:09 +01:00
"""
Parses a given string into an identifier .
Args :
name : The name to parse into an identifier .
dialect : The dialect to parse against .
Returns :
The identifier ast node .
"""
try :
expression = maybe_parse ( name , dialect = dialect , into = Identifier )
2025-02-13 21:51:42 +01:00
except ( ParseError , TokenError ) :
2025-02-13 21:16:09 +01:00
expression = to_identifier ( name )
return expression
2025-02-13 15:09:58 +01:00
INTERVAL_STRING_RE = re . compile ( r " \ s*([0-9]+) \ s*([a-zA-Z]+) \ s* " )
def to_interval ( interval : str | Literal ) - > Interval :
""" Builds an interval expression from a string like ' 1 day ' or ' 5 months ' . """
if isinstance ( interval , Literal ) :
if not interval . is_string :
raise ValueError ( " Invalid interval string. " )
interval = interval . this
2025-02-13 21:56:02 +01:00
interval = maybe_parse ( f " INTERVAL { interval } " )
assert isinstance ( interval , Interval )
return interval
2025-02-13 15:09:58 +01:00
2025-02-13 15:57:23 +01:00
def to_table (
2025-02-13 21:31:23 +01:00
sql_path : str | Table , dialect : DialectType = None , copy : bool = True , * * kwargs
) - > Table :
2025-02-13 14:46:58 +01:00
"""
Create a table expression from a ` [ catalog ] . [ schema ] . [ table ] ` sql path . Catalog and schema are optional .
2025-02-13 14:48:46 +01:00
If a table is passed in then that table is returned .
2025-02-13 14:46:58 +01:00
Args :
2025-02-13 14:53:05 +01:00
sql_path : a ` [ catalog ] . [ schema ] . [ table ] ` string .
2025-02-13 15:57:23 +01:00
dialect : the source dialect according to which the table name will be parsed .
2025-02-13 21:28:36 +01:00
copy : Whether to copy a table if it is passed in .
2025-02-13 15:57:23 +01:00
kwargs : the kwargs to instantiate the resulting ` Table ` expression with .
2025-02-13 14:53:05 +01:00
2025-02-13 14:46:58 +01:00
Returns :
2025-02-13 14:53:05 +01:00
A table expression .
2025-02-13 14:46:58 +01:00
"""
2025-02-13 21:31:23 +01:00
if isinstance ( sql_path , Table ) :
2025-02-13 21:17:09 +01:00
return maybe_copy ( sql_path , copy = copy )
2025-02-13 14:48:46 +01:00
2025-02-13 15:57:23 +01:00
table = maybe_parse ( sql_path , into = Table , dialect = dialect )
2025-02-13 21:31:23 +01:00
for k , v in kwargs . items ( ) :
table . set ( k , v )
2025-02-13 15:57:23 +01:00
return table
2025-02-13 14:46:58 +01:00
2025-02-13 21:31:23 +01:00
def to_column (
sql_path : str | Column ,
quoted : t . Optional [ bool ] = None ,
dialect : DialectType = None ,
copy : bool = True ,
* * kwargs ,
) - > Column :
2025-02-13 14:48:46 +01:00
"""
2025-02-13 21:31:23 +01:00
Create a column from a ` [ table ] . [ column ] ` sql path . Table is optional .
2025-02-13 14:48:46 +01:00
If a column is passed in then that column is returned .
Args :
2025-02-13 21:31:23 +01:00
sql_path : a ` [ table ] . [ column ] ` string .
quoted : Whether or not to force quote identifiers .
dialect : the source dialect according to which the column name will be parsed .
copy : Whether to copy a column if it is passed in .
kwargs : the kwargs to instantiate the resulting ` Column ` expression with .
2025-02-13 14:48:46 +01:00
Returns :
2025-02-13 21:31:23 +01:00
A column expression .
2025-02-13 14:48:46 +01:00
"""
2025-02-13 21:31:23 +01:00
if isinstance ( sql_path , Column ) :
return maybe_copy ( sql_path , copy = copy )
try :
col = maybe_parse ( sql_path , into = Column , dialect = dialect )
except ParseError :
return column ( * reversed ( sql_path . split ( " . " ) ) , quoted = quoted , * * kwargs )
for k , v in kwargs . items ( ) :
col . set ( k , v )
if quoted :
for i in col . find_all ( Identifier ) :
i . set ( " quoted " , True )
return col
2025-02-13 14:48:46 +01:00
2025-02-13 15:23:26 +01:00
def alias_ (
2025-02-13 15:46:19 +01:00
expression : ExpOrStr ,
2025-02-13 21:28:36 +01:00
alias : t . Optional [ str | Identifier ] ,
2025-02-13 15:23:26 +01:00
table : bool | t . Sequence [ str | Identifier ] = False ,
quoted : t . Optional [ bool ] = None ,
dialect : DialectType = None ,
2025-02-13 15:57:23 +01:00
copy : bool = True ,
2025-02-13 15:23:26 +01:00
* * opts ,
) :
""" Create an Alias expression.
2025-02-13 14:48:46 +01:00
Example :
2025-02-13 06:15:54 +01:00
>> > alias_ ( ' foo ' , ' bar ' ) . sql ( )
' foo AS bar '
2025-02-13 15:23:26 +01:00
>> > alias_ ( ' (select 1, 2) ' , ' bar ' , table = [ ' a ' , ' b ' ] ) . sql ( )
' (SELECT 1, 2) AS bar(a, b) '
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 15:23:26 +01:00
expression : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:23:26 +01:00
alias : the alias name to use . If the name has
2025-02-13 06:15:54 +01:00
special characters it is quoted .
2025-02-13 21:28:36 +01:00
table : Whether to create a table alias , can also be a list of columns .
quoted : whether to quote the alias
2025-02-13 15:23:26 +01:00
dialect : the dialect used to parse the input expression .
2025-02-13 21:28:36 +01:00
copy : Whether to copy the expression .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
Alias : the aliased expression
"""
2025-02-13 15:57:23 +01:00
exp = maybe_parse ( expression , dialect = dialect , copy = copy , * * opts )
2025-02-13 06:15:54 +01:00
alias = to_identifier ( alias , quoted = quoted )
2025-02-13 14:48:46 +01:00
if table :
2025-02-13 15:23:26 +01:00
table_alias = TableAlias ( this = alias )
exp . set ( " alias " , table_alias )
if not isinstance ( table , bool ) :
for column in table :
table_alias . append ( " columns " , to_identifier ( column , quoted = quoted ) )
return exp
2025-02-13 14:48:46 +01:00
# We don't set the "alias" arg for Window expressions, because that would add an IDENTIFIER node in
# the AST, representing a "named_window" [1] construct (eg. bigquery). What we want is an ALIAS node
# for the complete Window expression.
#
# [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls
2025-02-13 06:15:54 +01:00
2025-02-13 14:45:11 +01:00
if " alias " in exp . arg_types and not isinstance ( exp , Window ) :
2025-02-13 06:15:54 +01:00
exp . set ( " alias " , alias )
return exp
return Alias ( this = exp , alias = alias )
2025-02-13 15:57:23 +01:00
def subquery (
expression : ExpOrStr ,
alias : t . Optional [ Identifier | str ] = None ,
dialect : DialectType = None ,
* * opts ,
) - > Select :
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:30:28 +01:00
Build a subquery expression that ' s selected from.
2025-02-13 15:09:58 +01:00
Example :
2025-02-13 06:15:54 +01:00
>> > subquery ( ' select x from tbl ' , ' bar ' ) . select ( ' x ' ) . sql ( )
' SELECT x FROM (SELECT x FROM tbl) AS bar '
Args :
2025-02-13 15:57:23 +01:00
expression : the SQL code strings to parse .
2025-02-13 06:15:54 +01:00
If an Expression instance is passed , this is used as - is .
2025-02-13 15:57:23 +01:00
alias : the alias name to use .
dialect : the dialect used to parse the input expression .
2025-02-13 06:15:54 +01:00
* * opts : other options to use to parse the input expressions .
Returns :
2025-02-13 15:57:23 +01:00
A new Select instance with the subquery expression included .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:31:23 +01:00
expression = maybe_parse ( expression , dialect = dialect , * * opts ) . subquery ( alias , * * opts )
2025-02-13 06:15:54 +01:00
return Select ( ) . from_ ( expression , dialect = dialect , * * opts )
2025-02-13 21:19:14 +01:00
@t.overload
def column (
col : str | Identifier ,
table : t . Optional [ str | Identifier ] = None ,
db : t . Optional [ str | Identifier ] = None ,
catalog : t . Optional [ str | Identifier ] = None ,
* ,
fields : t . Collection [ t . Union [ str , Identifier ] ] ,
quoted : t . Optional [ bool ] = None ,
copy : bool = True ,
) - > Dot :
pass
@t.overload
2025-02-13 15:26:26 +01:00
def column (
col : str | Identifier ,
table : t . Optional [ str | Identifier ] = None ,
2025-02-13 15:48:10 +01:00
db : t . Optional [ str | Identifier ] = None ,
catalog : t . Optional [ str | Identifier ] = None ,
2025-02-13 21:19:14 +01:00
* ,
fields : Lit [ None ] = None ,
2025-02-13 15:26:26 +01:00
quoted : t . Optional [ bool ] = None ,
2025-02-13 21:19:14 +01:00
copy : bool = True ,
2025-02-13 15:26:26 +01:00
) - > Column :
2025-02-13 21:19:14 +01:00
pass
def column (
col ,
table = None ,
db = None ,
catalog = None ,
* ,
fields = None ,
quoted = None ,
copy = True ,
) :
2025-02-13 06:15:54 +01:00
"""
Build a Column .
2025-02-13 15:09:58 +01:00
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 15:57:23 +01:00
col : Column name .
table : Table name .
db : Database name .
catalog : Catalog name .
2025-02-13 21:19:14 +01:00
fields : Additional fields using dots .
2025-02-13 15:57:23 +01:00
quoted : Whether to force quotes on the column ' s identifiers.
2025-02-13 21:28:36 +01:00
copy : Whether to copy identifiers if passed in .
2025-02-13 15:57:23 +01:00
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Column instance .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 21:19:14 +01:00
this = Column (
this = to_identifier ( col , quoted = quoted , copy = copy ) ,
table = to_identifier ( table , quoted = quoted , copy = copy ) ,
db = to_identifier ( db , quoted = quoted , copy = copy ) ,
catalog = to_identifier ( catalog , quoted = quoted , copy = copy ) ,
2025-02-13 06:15:54 +01:00
)
2025-02-13 21:19:14 +01:00
if fields :
2025-02-13 21:31:23 +01:00
this = Dot . build (
( this , * ( to_identifier ( field , quoted = quoted , copy = copy ) for field in fields ) )
)
2025-02-13 21:19:14 +01:00
return this
2025-02-13 06:15:54 +01:00
2025-02-13 21:52:32 +01:00
def cast (
expression : ExpOrStr , to : DATA_TYPE , copy : bool = True , dialect : DialectType = None , * * opts
) - > Cast :
2025-02-13 15:09:58 +01:00
""" Cast an expression to a data type.
Example :
>> > cast ( ' x + 1 ' , ' int ' ) . sql ( )
' CAST(x + 1 AS INT) '
Args :
expression : The expression to cast .
to : The datatype to cast to .
2025-02-13 21:28:36 +01:00
copy : Whether to copy the supplied expressions .
2025-02-13 21:52:32 +01:00
dialect : The target dialect . This is used to prevent a re - cast in the following scenario :
- The expression to be cast is already a exp . Cast expression
- The existing cast is to a type that is logically equivalent to new type
For example , if : expression = ' CAST(x as DATETIME) ' and : to = Type . TIMESTAMP ,
but in the target dialect DATETIME is mapped to TIMESTAMP , then we will NOT return ` CAST ( x ( as DATETIME ) as TIMESTAMP ) `
and instead just return the original expression ` CAST ( x as DATETIME ) ` .
This is to prevent it being output as a double cast ` CAST ( x ( as TIMESTAMP ) as TIMESTAMP ) ` once the DATETIME - > TIMESTAMP
mapping is applied in the target dialect generator .
2025-02-13 15:09:58 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Cast instance .
2025-02-13 15:09:58 +01:00
"""
2025-02-13 21:52:32 +01:00
expr = maybe_parse ( expression , copy = copy , dialect = dialect , * * opts )
data_type = DataType . build ( to , copy = copy , dialect = dialect , * * opts )
# dont re-cast if the expression is already a cast to the correct type
if isinstance ( expr , Cast ) :
from sqlglot . dialects . dialect import Dialect
2025-02-13 21:31:23 +01:00
2025-02-13 21:52:32 +01:00
target_dialect = Dialect . get_or_raise ( dialect )
type_mapping = target_dialect . generator_class . TYPE_MAPPING
existing_cast_type : DataType . Type = expr . to . this
new_cast_type : DataType . Type = data_type . this
types_are_equivalent = type_mapping . get (
2025-02-13 21:57:20 +01:00
existing_cast_type , existing_cast_type . value
) == type_mapping . get ( new_cast_type , new_cast_type . value )
2025-02-13 21:57:37 +01:00
2025-02-13 21:52:32 +01:00
if expr . is_type ( data_type ) or types_are_equivalent :
return expr
2025-02-13 21:31:23 +01:00
expr = Cast ( this = expr , to = data_type )
expr . type = data_type
return expr
2025-02-13 15:09:58 +01:00
2025-02-13 15:57:23 +01:00
def table_ (
table : Identifier | str ,
db : t . Optional [ Identifier | str ] = None ,
catalog : t . Optional [ Identifier | str ] = None ,
quoted : t . Optional [ bool ] = None ,
alias : t . Optional [ Identifier | str ] = None ,
) - > Table :
2025-02-13 14:40:43 +01:00
""" Build a Table.
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 15:57:23 +01:00
table : Table name .
db : Database name .
catalog : Catalog name .
quote : Whether to force quotes on the table ' s identifiers.
alias : Table ' s alias.
2025-02-13 14:40:43 +01:00
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
The new Table instance .
2025-02-13 06:15:54 +01:00
"""
return Table (
2025-02-13 20:58:22 +01:00
this = to_identifier ( table , quoted = quoted ) if table else None ,
db = to_identifier ( db , quoted = quoted ) if db else None ,
catalog = to_identifier ( catalog , quoted = quoted ) if catalog else None ,
2025-02-13 14:48:46 +01:00
alias = TableAlias ( this = to_identifier ( alias ) ) if alias else None ,
2025-02-13 06:15:54 +01:00
)
2025-02-13 15:00:13 +01:00
def values (
values : t . Iterable [ t . Tuple [ t . Any , . . . ] ] ,
alias : t . Optional [ str ] = None ,
2025-02-13 15:03:38 +01:00
columns : t . Optional [ t . Iterable [ str ] | t . Dict [ str , DataType ] ] = None ,
2025-02-13 15:00:13 +01:00
) - > Values :
2025-02-13 14:46:58 +01:00
""" Build VALUES statement.
Example :
>> > values ( [ ( 1 , ' 2 ' ) ] ) . sql ( )
" VALUES (1, ' 2 ' ) "
Args :
2025-02-13 15:00:13 +01:00
values : values statements that will be converted to SQL
alias : optional alias
2025-02-13 15:03:38 +01:00
columns : Optional list of ordered column names or ordered dictionary of column names to types .
If either are provided then an alias is also required .
2025-02-13 14:46:58 +01:00
Returns :
Values : the Values expression object
"""
2025-02-13 15:00:13 +01:00
if columns and not alias :
raise ValueError ( " Alias is required when providing columns " )
2025-02-13 15:53:39 +01:00
2025-02-13 14:46:58 +01:00
return Values (
2025-02-13 15:53:39 +01:00
expressions = [ convert ( tup ) for tup in values ] ,
alias = (
TableAlias ( this = to_identifier ( alias ) , columns = [ to_identifier ( x ) for x in columns ] )
if columns
else ( TableAlias ( this = to_identifier ( alias ) ) if alias else None )
) ,
2025-02-13 14:46:58 +01:00
)
2025-02-13 15:46:19 +01:00
def var ( name : t . Optional [ ExpOrStr ] ) - > Var :
2025-02-13 15:26:26 +01:00
""" Build a SQL variable.
Example :
>> > repr ( var ( ' x ' ) )
2025-02-13 21:19:14 +01:00
' Var(this=x) '
2025-02-13 15:26:26 +01:00
>> > repr ( var ( column ( ' x ' , table = ' y ' ) ) )
2025-02-13 21:19:14 +01:00
' Var(this=x) '
2025-02-13 15:26:26 +01:00
Args :
name : The name of the var or an expression who ' s name will become the var.
Returns :
The new variable node .
"""
if not name :
2025-02-13 15:46:19 +01:00
raise ValueError ( " Cannot convert empty name into var. " )
2025-02-13 15:26:26 +01:00
if isinstance ( name , Expression ) :
name = name . name
return Var ( this = name )
2025-02-13 21:31:23 +01:00
def rename_table (
old_name : str | Table ,
new_name : str | Table ,
dialect : DialectType = None ,
2025-02-13 21:52:32 +01:00
) - > Alter :
2025-02-13 15:05:06 +01:00
""" Build ALTER TABLE... RENAME... expression
Args :
old_name : The old name of the table
new_name : The new name of the table
2025-02-13 21:31:23 +01:00
dialect : The dialect to parse the table .
2025-02-13 15:05:06 +01:00
Returns :
Alter table expression
"""
2025-02-13 21:31:23 +01:00
old_table = to_table ( old_name , dialect = dialect )
new_table = to_table ( new_name , dialect = dialect )
2025-02-13 21:52:32 +01:00
return Alter (
2025-02-13 15:05:06 +01:00
this = old_table ,
2025-02-13 21:52:32 +01:00
kind = " TABLE " ,
2025-02-13 15:05:06 +01:00
actions = [
2025-02-13 21:56:02 +01:00
AlterRename ( this = new_table ) ,
2025-02-13 15:05:06 +01:00
] ,
)
2025-02-13 21:19:58 +01:00
def rename_column (
table_name : str | Table ,
old_column_name : str | Column ,
new_column_name : str | Column ,
exists : t . Optional [ bool ] = None ,
2025-02-13 21:31:23 +01:00
dialect : DialectType = None ,
2025-02-13 21:52:32 +01:00
) - > Alter :
2025-02-13 21:19:58 +01:00
""" Build ALTER TABLE... RENAME COLUMN... expression
Args :
table_name : Name of the table
old_column : The old name of the column
new_column : The new name of the column
2025-02-13 21:28:36 +01:00
exists : Whether to add the ` IF EXISTS ` clause
2025-02-13 21:31:23 +01:00
dialect : The dialect to parse the table / column .
2025-02-13 21:19:58 +01:00
Returns :
Alter table expression
"""
2025-02-13 21:31:23 +01:00
table = to_table ( table_name , dialect = dialect )
old_column = to_column ( old_column_name , dialect = dialect )
new_column = to_column ( new_column_name , dialect = dialect )
2025-02-13 21:52:32 +01:00
return Alter (
2025-02-13 21:19:58 +01:00
this = table ,
2025-02-13 21:52:32 +01:00
kind = " TABLE " ,
2025-02-13 21:19:58 +01:00
actions = [
RenameColumn ( this = old_column , to = new_column , exists = exists ) ,
] ,
)
2025-02-13 15:53:39 +01:00
def convert ( value : t . Any , copy : bool = False ) - > Expression :
2025-02-13 14:40:43 +01:00
""" Convert a python value into an expression object.
Raises an error if a conversion is not possible .
Args :
2025-02-13 15:53:39 +01:00
value : A python object .
2025-02-13 21:28:36 +01:00
copy : Whether to copy ` value ` ( only applies to Expressions and collections ) .
2025-02-13 14:40:43 +01:00
Returns :
2025-02-13 21:30:28 +01:00
The equivalent expression object .
2025-02-13 14:40:43 +01:00
"""
if isinstance ( value , Expression ) :
2025-02-13 20:48:36 +01:00
return maybe_copy ( value , copy )
2025-02-13 14:40:43 +01:00
if isinstance ( value , str ) :
return Literal . string ( value )
2025-02-13 15:52:09 +01:00
if isinstance ( value , bool ) :
return Boolean ( this = value )
if value is None or ( isinstance ( value , float ) and math . isnan ( value ) ) :
2025-02-13 21:19:58 +01:00
return null ( )
2025-02-13 14:40:43 +01:00
if isinstance ( value , numbers . Number ) :
return Literal . number ( value )
2025-02-13 21:30:28 +01:00
if isinstance ( value , bytes ) :
return HexString ( this = value . hex ( ) )
2025-02-13 15:52:09 +01:00
if isinstance ( value , datetime . datetime ) :
2025-02-13 21:52:32 +01:00
datetime_literal = Literal . string ( value . isoformat ( sep = " " ) )
tz = None
if value . tzinfo :
# this works for zoneinfo.ZoneInfo, pytz.timezone and datetime.datetime.utc to return IANA timezone names like "America/Los_Angeles"
# instead of abbreviations like "PDT". This is for consistency with other timezone handling functions in SQLGlot
tz = Literal . string ( str ( value . tzinfo ) )
return TimeStrToTime ( this = datetime_literal , zone = tz )
2025-02-13 15:52:09 +01:00
if isinstance ( value , datetime . date ) :
date_literal = Literal . string ( value . strftime ( " % Y- % m- %d " ) )
return DateStrToDate ( this = date_literal )
2025-02-13 14:40:43 +01:00
if isinstance ( value , tuple ) :
2025-02-13 21:30:28 +01:00
if hasattr ( value , " _fields " ) :
return Struct (
expressions = [
PropertyEQ (
this = to_identifier ( k ) , expression = convert ( getattr ( value , k ) , copy = copy )
)
for k in value . _fields
]
)
2025-02-13 15:53:39 +01:00
return Tuple ( expressions = [ convert ( v , copy = copy ) for v in value ] )
2025-02-13 14:40:43 +01:00
if isinstance ( value , list ) :
2025-02-13 15:53:39 +01:00
return Array ( expressions = [ convert ( v , copy = copy ) for v in value ] )
2025-02-13 14:40:43 +01:00
if isinstance ( value , dict ) :
return Map (
2025-02-13 20:58:22 +01:00
keys = Array ( expressions = [ convert ( k , copy = copy ) for k in value ] ) ,
values = Array ( expressions = [ convert ( v , copy = copy ) for v in value . values ( ) ] ) ,
2025-02-13 14:40:43 +01:00
)
2025-02-13 21:30:28 +01:00
if hasattr ( value , " __dict__ " ) :
return Struct (
expressions = [
PropertyEQ ( this = to_identifier ( k ) , expression = convert ( v , copy = copy ) )
for k , v in value . __dict__ . items ( )
]
)
2025-02-13 14:40:43 +01:00
raise ValueError ( f " Cannot convert { value } " )
2025-02-13 15:57:23 +01:00
def replace_children ( expression : Expression , fun : t . Callable , * args , * * kwargs ) - > None :
2025-02-13 06:15:54 +01:00
"""
Replace children of an expression with the result of a lambda fun ( child ) - > exp .
"""
2025-02-13 21:30:28 +01:00
for k , v in tuple ( expression . args . items ( ) ) :
2025-02-13 15:48:10 +01:00
is_list_arg = type ( v ) is list
2025-02-13 06:15:54 +01:00
child_nodes = v if is_list_arg else [ v ]
new_child_nodes = [ ]
for cn in child_nodes :
if isinstance ( cn , Expression ) :
2025-02-13 15:46:19 +01:00
for child_node in ensure_collection ( fun ( cn , * args , * * kwargs ) ) :
2025-02-13 06:15:54 +01:00
new_child_nodes . append ( child_node )
else :
new_child_nodes . append ( cn )
2025-02-13 21:30:28 +01:00
expression . set ( k , new_child_nodes if is_list_arg else seq_get ( new_child_nodes , 0 ) )
def replace_tree (
expression : Expression ,
fun : t . Callable ,
prune : t . Optional [ t . Callable [ [ Expression ] , bool ] ] = None ,
) - > Expression :
"""
Replace an entire tree with the result of function calls on each node .
This will be traversed in reverse dfs , so leaves first .
If new nodes are created as a result of function calls , they will also be traversed .
"""
stack = list ( expression . dfs ( prune = prune ) )
while stack :
node = stack . pop ( )
new_node = fun ( node )
if new_node is not node :
node . replace ( new_node )
if isinstance ( new_node , Expression ) :
stack . append ( new_node )
return new_node
2025-02-13 06:15:54 +01:00
2025-02-13 16:00:51 +01:00
def column_table_names ( expression : Expression , exclude : str = " " ) - > t . Set [ str ] :
2025-02-13 06:15:54 +01:00
"""
Return all table names referenced through columns in an expression .
Example :
>> > import sqlglot
2025-02-13 16:00:51 +01:00
>> > sorted ( column_table_names ( sqlglot . parse_one ( " a.b AND c.d AND c.e " ) ) )
[ ' a ' , ' c ' ]
2025-02-13 06:15:54 +01:00
Args :
2025-02-13 15:57:23 +01:00
expression : expression to find table names .
2025-02-13 16:00:51 +01:00
exclude : a table name to exclude
2025-02-13 06:15:54 +01:00
Returns :
2025-02-13 15:57:23 +01:00
A list of unique names .
2025-02-13 06:15:54 +01:00
"""
2025-02-13 16:00:51 +01:00
return {
table
for table in ( column . table for column in expression . find_all ( Column ) )
if table and table != exclude
}
2025-02-13 06:15:54 +01:00
2025-02-13 21:17:09 +01:00
def table_name ( table : Table | str , dialect : DialectType = None , identify : bool = False ) - > str :
2025-02-13 14:45:11 +01:00
""" Get the full name of a table as a string.
Args :
2025-02-13 20:15:27 +01:00
table : Table expression node or string .
dialect : The dialect to generate the table name for .
2025-02-13 21:17:09 +01:00
identify : Determines when an identifier should be quoted . Possible values are :
False ( default ) : Never quote , except in cases where it ' s mandatory by the dialect.
True : Always quote .
2025-02-13 14:45:11 +01:00
Examples :
>> > from sqlglot import exp , parse_one
>> > table_name ( parse_one ( " select * from a.b.c " ) . find ( exp . Table ) )
' a.b.c '
Returns :
2025-02-13 15:07:05 +01:00
The table name .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:04:58 +01:00
table = maybe_parse ( table , into = Table , dialect = dialect )
2025-02-13 14:45:11 +01:00
2025-02-13 14:53:05 +01:00
if not table :
raise ValueError ( f " Cannot parse { table } " )
2025-02-13 20:15:27 +01:00
return " . " . join (
2025-02-13 21:19:58 +01:00
(
2025-02-13 21:56:38 +01:00
part . sql ( dialect = dialect , identify = True , copy = False , comments = False )
2025-02-13 21:19:58 +01:00
if identify or not SAFE_IDENTIFIER_RE . match ( part . name )
else part . name
)
2025-02-13 20:15:27 +01:00
for part in table . parts
)
2025-02-13 14:45:11 +01:00
2025-02-13 21:17:09 +01:00
def normalize_table_name ( table : str | Table , dialect : DialectType = None , copy : bool = True ) - > str :
""" Returns a case normalized table name without quotes.
Args :
table : the table to normalize
dialect : the dialect to use for normalization rules
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 21:17:09 +01:00
Examples :
>> > normalize_table_name ( " `A-B`.c " , dialect = " bigquery " )
' A-B.c '
"""
from sqlglot . optimizer . normalize_identifiers import normalize_identifiers
return " . " . join (
p . name
for p in normalize_identifiers (
to_table ( table , dialect = dialect , copy = copy ) , dialect = dialect
) . parts
)
def replace_tables (
expression : E , mapping : t . Dict [ str , str ] , dialect : DialectType = None , copy : bool = True
) - > E :
2025-02-13 14:45:11 +01:00
""" Replace all tables in expression according to the mapping.
Args :
2025-02-13 15:57:23 +01:00
expression : expression node to be transformed and replaced .
mapping : mapping of table names .
2025-02-13 21:17:09 +01:00
dialect : the dialect of the mapping table
2025-02-13 21:28:36 +01:00
copy : whether to copy the expression .
2025-02-13 14:45:11 +01:00
Examples :
>> > from sqlglot import exp , parse_one
>> > replace_tables ( parse_one ( " select * from a.b " ) , { " a.b " : " c " } ) . sql ( )
2025-02-13 21:17:09 +01:00
' SELECT * FROM c /* a.b */ '
2025-02-13 14:45:11 +01:00
Returns :
2025-02-13 15:07:05 +01:00
The mapped expression .
2025-02-13 14:45:11 +01:00
"""
2025-02-13 21:17:09 +01:00
mapping = { normalize_table_name ( k , dialect = dialect ) : v for k , v in mapping . items ( ) }
2025-02-13 15:57:23 +01:00
def _replace_tables ( node : Expression ) - > Expression :
2025-02-13 21:58:22 +01:00
if isinstance ( node , Table ) and node . meta . get ( " replace " ) is not False :
2025-02-13 21:17:09 +01:00
original = normalize_table_name ( node , dialect = dialect )
new_name = mapping . get ( original )
2025-02-13 14:45:11 +01:00
if new_name :
2025-02-13 21:17:09 +01:00
table = to_table (
2025-02-13 14:48:46 +01:00
new_name ,
2025-02-13 21:17:09 +01:00
* * { k : v for k , v in node . args . items ( ) if k not in TABLE_PARTS } ,
2025-02-13 21:27:51 +01:00
dialect = dialect ,
2025-02-13 14:48:46 +01:00
)
2025-02-13 21:17:09 +01:00
table . add_comments ( [ original ] )
return table
2025-02-13 14:45:11 +01:00
return node
2025-02-13 21:30:28 +01:00
return expression . transform ( _replace_tables , copy = copy ) # type: ignore
2025-02-13 14:45:11 +01:00
2025-02-13 15:57:23 +01:00
def replace_placeholders ( expression : Expression , * args , * * kwargs ) - > Expression :
2025-02-13 14:50:31 +01:00
""" Replace placeholders in an expression.
Args :
2025-02-13 15:57:23 +01:00
expression : expression node to be transformed and replaced .
2025-02-13 15:07:05 +01:00
args : positional names that will substitute unnamed placeholders in the given order .
kwargs : keyword arguments that will substitute named placeholders .
2025-02-13 14:50:31 +01:00
Examples :
>> > from sqlglot import exp , parse_one
>> > replace_placeholders (
2025-02-13 15:50:57 +01:00
. . . parse_one ( " select * from :tbl where ? = ? " ) ,
. . . exp . to_identifier ( " str_col " ) , " b " , tbl = exp . to_identifier ( " foo " )
2025-02-13 14:50:31 +01:00
. . . ) . sql ( )
2025-02-13 15:50:57 +01:00
" SELECT * FROM foo WHERE str_col = ' b ' "
2025-02-13 14:50:31 +01:00
Returns :
2025-02-13 15:07:05 +01:00
The mapped expression .
2025-02-13 14:50:31 +01:00
"""
2025-02-13 15:57:23 +01:00
def _replace_placeholders ( node : Expression , args , * * kwargs ) - > Expression :
2025-02-13 14:50:31 +01:00
if isinstance ( node , Placeholder ) :
2025-02-13 21:30:28 +01:00
if node . this :
new_name = kwargs . get ( node . this )
2025-02-13 21:29:39 +01:00
if new_name is not None :
2025-02-13 15:50:57 +01:00
return convert ( new_name )
2025-02-13 14:50:31 +01:00
else :
try :
2025-02-13 15:50:57 +01:00
return convert ( next ( args ) )
2025-02-13 14:50:31 +01:00
except StopIteration :
pass
return node
return expression . transform ( _replace_placeholders , iter ( args ) , * * kwargs )
2025-02-13 15:52:09 +01:00
def expand (
2025-02-13 21:17:09 +01:00
expression : Expression ,
2025-02-13 21:29:39 +01:00
sources : t . Dict [ str , Query ] ,
2025-02-13 21:17:09 +01:00
dialect : DialectType = None ,
copy : bool = True ,
2025-02-13 15:52:09 +01:00
) - > Expression :
2025-02-13 15:07:05 +01:00
""" Transforms an expression by expanding all referenced sources into subqueries.
Examples :
>> > from sqlglot import parse_one
>> > expand ( parse_one ( " select * from x AS z " ) , { " x " : parse_one ( " select * from y " ) } ) . sql ( )
' SELECT * FROM (SELECT * FROM y) AS z /* source: x */ '
2025-02-13 15:52:09 +01:00
>> > expand ( parse_one ( " select * from x AS z " ) , { " x " : parse_one ( " select * from y " ) , " y " : parse_one ( " select * from z " ) } ) . sql ( )
' SELECT * FROM (SELECT * FROM (SELECT * FROM z) AS y /* source: y */) AS z /* source: x */ '
2025-02-13 15:07:05 +01:00
Args :
expression : The expression to expand .
2025-02-13 21:29:39 +01:00
sources : A dictionary of name to Queries .
2025-02-13 21:17:09 +01:00
dialect : The dialect of the sources dict .
2025-02-13 21:28:36 +01:00
copy : Whether to copy the expression during transformation . Defaults to True .
2025-02-13 15:07:05 +01:00
Returns :
The transformed expression .
"""
2025-02-13 21:17:09 +01:00
sources = { normalize_table_name ( k , dialect = dialect ) : v for k , v in sources . items ( ) }
2025-02-13 15:07:05 +01:00
def _expand ( node : Expression ) :
if isinstance ( node , Table ) :
2025-02-13 21:17:09 +01:00
name = normalize_table_name ( node , dialect = dialect )
2025-02-13 15:07:05 +01:00
source = sources . get ( name )
if source :
subquery = source . subquery ( node . alias or name )
subquery . comments = [ f " source: { name } " ]
2025-02-13 15:52:09 +01:00
return subquery . transform ( _expand , copy = False )
2025-02-13 15:07:05 +01:00
return node
return expression . transform ( _expand , copy = copy )
2025-02-13 21:17:09 +01:00
def func ( name : str , * args , copy : bool = True , dialect : DialectType = None , * * kwargs ) - > Func :
2025-02-13 15:07:05 +01:00
"""
Returns a Func expression .
Examples :
>> > func ( " abs " , 5 ) . sql ( )
' ABS(5) '
>> > func ( " cast " , this = 5 , to = DataType . build ( " DOUBLE " ) ) . sql ( )
' CAST(5 AS DOUBLE) '
Args :
name : the name of the function to build .
args : the args used to instantiate the function of interest .
2025-02-13 21:28:36 +01:00
copy : whether to copy the argument expressions .
2025-02-13 15:07:05 +01:00
dialect : the source dialect .
kwargs : the kwargs used to instantiate the function of interest .
Note :
The arguments ` args ` and ` kwargs ` are mutually exclusive .
Returns :
An instance of the function of interest , or an anonymous function , if ` name ` doesn ' t
correspond to an existing ` sqlglot . expressions . Func ` class .
"""
if args and kwargs :
raise ValueError ( " Can ' t use both args and kwargs to instantiate a function. " )
from sqlglot . dialects . dialect import Dialect
2025-02-13 21:17:09 +01:00
dialect = Dialect . get_or_raise ( dialect )
2025-02-13 15:07:05 +01:00
2025-02-13 21:17:09 +01:00
converted : t . List [ Expression ] = [ maybe_parse ( arg , dialect = dialect , copy = copy ) for arg in args ]
kwargs = { key : maybe_parse ( value , dialect = dialect , copy = copy ) for key , value in kwargs . items ( ) }
2025-02-13 15:07:05 +01:00
2025-02-13 21:17:09 +01:00
constructor = dialect . parser_class . FUNCTIONS . get ( name . upper ( ) )
if constructor :
if converted :
if " dialect " in constructor . __code__ . co_varnames :
function = constructor ( converted , dialect = dialect )
else :
function = constructor ( converted )
elif constructor . __name__ == " from_arg_list " :
function = constructor . __self__ ( * * kwargs ) # type: ignore
else :
constructor = FUNCTION_BY_NAME . get ( name . upper ( ) )
if constructor :
function = constructor ( * * kwargs )
else :
raise ValueError (
f " Unable to convert ' { name } ' into a Func. Either manually construct "
" the Func expression of interest or parse the function call. "
)
2025-02-13 15:07:05 +01:00
else :
2025-02-13 15:44:58 +01:00
kwargs = kwargs or { " expressions " : converted }
2025-02-13 15:07:05 +01:00
function = Anonymous ( this = name , * * kwargs )
2025-02-13 15:44:58 +01:00
for error_message in function . error_messages ( converted ) :
2025-02-13 15:07:05 +01:00
raise ValueError ( error_message )
return function
2025-02-13 21:17:09 +01:00
def case (
expression : t . Optional [ ExpOrStr ] = None ,
* * opts ,
) - > Case :
"""
Initialize a CASE statement .
Example :
case ( ) . when ( " a = 1 " , " foo " ) . else_ ( " bar " )
Args :
expression : Optionally , the input expression ( not all dialects support this )
* * opts : Extra keyword arguments for parsing ` expression `
"""
if expression is not None :
this = maybe_parse ( expression , * * opts )
else :
this = None
return Case ( this = this , ifs = [ ] )
2025-02-13 21:27:51 +01:00
def array (
* expressions : ExpOrStr , copy : bool = True , dialect : DialectType = None , * * kwargs
) - > Array :
"""
Returns an array .
Examples :
>> > array ( 1 , ' x ' ) . sql ( )
' ARRAY(1, x) '
Args :
expressions : the expressions to add to the array .
2025-02-13 21:28:36 +01:00
copy : whether to copy the argument expressions .
2025-02-13 21:27:51 +01:00
dialect : the source dialect .
kwargs : the kwargs used to instantiate the function of interest .
Returns :
An array expression .
"""
return Array (
expressions = [
maybe_parse ( expression , copy = copy , dialect = dialect , * * kwargs )
for expression in expressions
]
)
def tuple_ (
* expressions : ExpOrStr , copy : bool = True , dialect : DialectType = None , * * kwargs
) - > Tuple :
"""
Returns an tuple .
Examples :
>> > tuple_ ( 1 , ' x ' ) . sql ( )
' (1, x) '
Args :
expressions : the expressions to add to the tuple .
2025-02-13 21:28:36 +01:00
copy : whether to copy the argument expressions .
2025-02-13 21:27:51 +01:00
dialect : the source dialect .
kwargs : the kwargs used to instantiate the function of interest .
Returns :
A tuple expression .
"""
return Tuple (
expressions = [
maybe_parse ( expression , copy = copy , dialect = dialect , * * kwargs )
for expression in expressions
]
)
2025-02-13 15:57:23 +01:00
def true ( ) - > Boolean :
2025-02-13 15:07:05 +01:00
"""
Returns a true Boolean expression .
"""
2025-02-13 14:54:32 +01:00
return Boolean ( this = True )
2025-02-13 15:57:23 +01:00
def false ( ) - > Boolean :
2025-02-13 15:07:05 +01:00
"""
Returns a false Boolean expression .
"""
2025-02-13 14:54:32 +01:00
return Boolean ( this = False )
2025-02-13 15:57:23 +01:00
def null ( ) - > Null :
2025-02-13 15:07:05 +01:00
"""
Returns a Null expression .
"""
2025-02-13 14:54:32 +01:00
return Null ( )
2025-02-13 21:30:28 +01:00
NONNULL_CONSTANTS = (
Literal ,
Boolean ,
)
CONSTANTS = (
Literal ,
Boolean ,
Null ,
)