Edit on GitHub

sqlglot.dialects.prql

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, parser, tokens
  6from sqlglot.dialects.dialect import Dialect
  7from sqlglot.tokens import TokenType
  8
  9
 10def _select_all(table: exp.Expression) -> t.Optional[exp.Select]:
 11    return exp.select("*").from_(table, copy=False) if table else None
 12
 13
 14class PRQL(Dialect):
 15    DPIPE_IS_STRING_CONCAT = False
 16
 17    class Tokenizer(tokens.Tokenizer):
 18        IDENTIFIERS = ["`"]
 19        QUOTES = ["'", '"']
 20
 21        SINGLE_TOKENS = {
 22            **tokens.Tokenizer.SINGLE_TOKENS,
 23            "=": TokenType.ALIAS,
 24            "'": TokenType.QUOTE,
 25            '"': TokenType.QUOTE,
 26            "`": TokenType.IDENTIFIER,
 27            "#": TokenType.COMMENT,
 28        }
 29
 30        KEYWORDS = {
 31            **tokens.Tokenizer.KEYWORDS,
 32        }
 33
 34    class Parser(parser.Parser):
 35        CONJUNCTION = {
 36            **parser.Parser.CONJUNCTION,
 37            TokenType.DAMP: exp.And,
 38            TokenType.DPIPE: exp.Or,
 39        }
 40
 41        TRANSFORM_PARSERS = {
 42            "DERIVE": lambda self, query: self._parse_selection(query),
 43            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 44            "TAKE": lambda self, query: self._parse_take(query),
 45            "FILTER": lambda self, query: query.where(self._parse_conjunction()),
 46            "APPEND": lambda self, query: query.union(
 47                _select_all(self._parse_table()), distinct=False, copy=False
 48            ),
 49            "REMOVE": lambda self, query: query.except_(
 50                _select_all(self._parse_table()), distinct=False, copy=False
 51            ),
 52            "INTERSECT": lambda self, query: query.intersect(
 53                _select_all(self._parse_table()), distinct=False, copy=False
 54            ),
 55            "SORT": lambda self, query: self._parse_order_by(query),
 56        }
 57
 58        def _parse_equality(self) -> t.Optional[exp.Expression]:
 59            eq = self._parse_tokens(self._parse_comparison, self.EQUALITY)
 60            if not isinstance(eq, (exp.EQ, exp.NEQ)):
 61                return eq
 62
 63            # https://prql-lang.org/book/reference/spec/null.html
 64            if isinstance(eq.expression, exp.Null):
 65                is_exp = exp.Is(this=eq.this, expression=eq.expression)
 66                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 67            if isinstance(eq.this, exp.Null):
 68                is_exp = exp.Is(this=eq.expression, expression=eq.this)
 69                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 70            return eq
 71
 72        def _parse_statement(self) -> t.Optional[exp.Expression]:
 73            expression = self._parse_expression()
 74            expression = expression if expression else self._parse_query()
 75            return expression
 76
 77        def _parse_query(self) -> t.Optional[exp.Query]:
 78            from_ = self._parse_from()
 79
 80            if not from_:
 81                return None
 82
 83            query = exp.select("*").from_(from_, copy=False)
 84
 85            while self._match_texts(self.TRANSFORM_PARSERS):
 86                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 87
 88            return query
 89
 90        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 91            if self._match(TokenType.L_BRACE):
 92                selects = self._parse_csv(self._parse_expression)
 93
 94                if not self._match(TokenType.R_BRACE, expression=query):
 95                    self.raise_error("Expecting }")
 96            else:
 97                expression = self._parse_expression()
 98                selects = [expression] if expression else []
 99
100            projections = {
101                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
102                for select in query.selects
103            }
104
105            selects = [
106                select.transform(
107                    lambda s: (projections[s.name].copy() if s.name in projections else s)
108                    if isinstance(s, exp.Column)
109                    else s,
110                    copy=False,
111                )
112                for select in selects
113            ]
114
115            return query.select(*selects, append=append, copy=False)
116
117        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
118            num = self._parse_number()  # TODO: TAKE for ranges a..b
119            return query.limit(num) if num else None
120
121        def _parse_ordered(
122            self, parse_method: t.Optional[t.Callable] = None
123        ) -> t.Optional[exp.Ordered]:
124            asc = self._match(TokenType.PLUS)
125            desc = self._match(TokenType.DASH) or (asc and False)
126            term = term = super()._parse_ordered(parse_method=parse_method)
127            if term and desc:
128                term.set("desc", True)
129                term.set("nulls_first", False)
130            return term
131
132        def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]:
133            l_brace = self._match(TokenType.L_BRACE)
134            expressions = self._parse_csv(self._parse_ordered)
135            if l_brace and not self._match(TokenType.R_BRACE):
136                self.raise_error("Expecting }")
137            return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False)
138
139        def _parse_expression(self) -> t.Optional[exp.Expression]:
140            if self._next and self._next.token_type == TokenType.ALIAS:
141                alias = self._parse_id_var(True)
142                self._match(TokenType.ALIAS)
143                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
144            return self._parse_conjunction()
145
146        def _parse_table(
147            self,
148            schema: bool = False,
149            joins: bool = False,
150            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
151            parse_bracket: bool = False,
152            is_db_reference: bool = False,
153            parse_partition: bool = False,
154        ) -> t.Optional[exp.Expression]:
155            return self._parse_table_parts()
156
157        def _parse_from(
158            self, joins: bool = False, skip_from_token: bool = False
159        ) -> t.Optional[exp.From]:
160            if not skip_from_token and not self._match(TokenType.FROM):
161                return None
162
163            return self.expression(
164                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
165            )
class PRQL(sqlglot.dialects.dialect.Dialect):
 15class PRQL(Dialect):
 16    DPIPE_IS_STRING_CONCAT = False
 17
 18    class Tokenizer(tokens.Tokenizer):
 19        IDENTIFIERS = ["`"]
 20        QUOTES = ["'", '"']
 21
 22        SINGLE_TOKENS = {
 23            **tokens.Tokenizer.SINGLE_TOKENS,
 24            "=": TokenType.ALIAS,
 25            "'": TokenType.QUOTE,
 26            '"': TokenType.QUOTE,
 27            "`": TokenType.IDENTIFIER,
 28            "#": TokenType.COMMENT,
 29        }
 30
 31        KEYWORDS = {
 32            **tokens.Tokenizer.KEYWORDS,
 33        }
 34
 35    class Parser(parser.Parser):
 36        CONJUNCTION = {
 37            **parser.Parser.CONJUNCTION,
 38            TokenType.DAMP: exp.And,
 39            TokenType.DPIPE: exp.Or,
 40        }
 41
 42        TRANSFORM_PARSERS = {
 43            "DERIVE": lambda self, query: self._parse_selection(query),
 44            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 45            "TAKE": lambda self, query: self._parse_take(query),
 46            "FILTER": lambda self, query: query.where(self._parse_conjunction()),
 47            "APPEND": lambda self, query: query.union(
 48                _select_all(self._parse_table()), distinct=False, copy=False
 49            ),
 50            "REMOVE": lambda self, query: query.except_(
 51                _select_all(self._parse_table()), distinct=False, copy=False
 52            ),
 53            "INTERSECT": lambda self, query: query.intersect(
 54                _select_all(self._parse_table()), distinct=False, copy=False
 55            ),
 56            "SORT": lambda self, query: self._parse_order_by(query),
 57        }
 58
 59        def _parse_equality(self) -> t.Optional[exp.Expression]:
 60            eq = self._parse_tokens(self._parse_comparison, self.EQUALITY)
 61            if not isinstance(eq, (exp.EQ, exp.NEQ)):
 62                return eq
 63
 64            # https://prql-lang.org/book/reference/spec/null.html
 65            if isinstance(eq.expression, exp.Null):
 66                is_exp = exp.Is(this=eq.this, expression=eq.expression)
 67                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 68            if isinstance(eq.this, exp.Null):
 69                is_exp = exp.Is(this=eq.expression, expression=eq.this)
 70                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 71            return eq
 72
 73        def _parse_statement(self) -> t.Optional[exp.Expression]:
 74            expression = self._parse_expression()
 75            expression = expression if expression else self._parse_query()
 76            return expression
 77
 78        def _parse_query(self) -> t.Optional[exp.Query]:
 79            from_ = self._parse_from()
 80
 81            if not from_:
 82                return None
 83
 84            query = exp.select("*").from_(from_, copy=False)
 85
 86            while self._match_texts(self.TRANSFORM_PARSERS):
 87                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 88
 89            return query
 90
 91        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 92            if self._match(TokenType.L_BRACE):
 93                selects = self._parse_csv(self._parse_expression)
 94
 95                if not self._match(TokenType.R_BRACE, expression=query):
 96                    self.raise_error("Expecting }")
 97            else:
 98                expression = self._parse_expression()
 99                selects = [expression] if expression else []
100
101            projections = {
102                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
103                for select in query.selects
104            }
105
106            selects = [
107                select.transform(
108                    lambda s: (projections[s.name].copy() if s.name in projections else s)
109                    if isinstance(s, exp.Column)
110                    else s,
111                    copy=False,
112                )
113                for select in selects
114            ]
115
116            return query.select(*selects, append=append, copy=False)
117
118        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
119            num = self._parse_number()  # TODO: TAKE for ranges a..b
120            return query.limit(num) if num else None
121
122        def _parse_ordered(
123            self, parse_method: t.Optional[t.Callable] = None
124        ) -> t.Optional[exp.Ordered]:
125            asc = self._match(TokenType.PLUS)
126            desc = self._match(TokenType.DASH) or (asc and False)
127            term = term = super()._parse_ordered(parse_method=parse_method)
128            if term and desc:
129                term.set("desc", True)
130                term.set("nulls_first", False)
131            return term
132
133        def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]:
134            l_brace = self._match(TokenType.L_BRACE)
135            expressions = self._parse_csv(self._parse_ordered)
136            if l_brace and not self._match(TokenType.R_BRACE):
137                self.raise_error("Expecting }")
138            return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False)
139
140        def _parse_expression(self) -> t.Optional[exp.Expression]:
141            if self._next and self._next.token_type == TokenType.ALIAS:
142                alias = self._parse_id_var(True)
143                self._match(TokenType.ALIAS)
144                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
145            return self._parse_conjunction()
146
147        def _parse_table(
148            self,
149            schema: bool = False,
150            joins: bool = False,
151            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
152            parse_bracket: bool = False,
153            is_db_reference: bool = False,
154            parse_partition: bool = False,
155        ) -> t.Optional[exp.Expression]:
156            return self._parse_table_parts()
157
158        def _parse_from(
159            self, joins: bool = False, skip_from_token: bool = False
160        ) -> t.Optional[exp.From]:
161            if not skip_from_token and not self._match(TokenType.FROM):
162                return None
163
164            return self.expression(
165                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
166            )
DPIPE_IS_STRING_CONCAT = False

Whether the DPIPE token (||) is a string concatenation operator.

tokenizer_class = <class 'PRQL.Tokenizer'>
parser_class = <class 'PRQL.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '`'
IDENTIFIER_END = '`'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
class PRQL.Tokenizer(sqlglot.tokens.Tokenizer):
18    class Tokenizer(tokens.Tokenizer):
19        IDENTIFIERS = ["`"]
20        QUOTES = ["'", '"']
21
22        SINGLE_TOKENS = {
23            **tokens.Tokenizer.SINGLE_TOKENS,
24            "=": TokenType.ALIAS,
25            "'": TokenType.QUOTE,
26            '"': TokenType.QUOTE,
27            "`": TokenType.IDENTIFIER,
28            "#": TokenType.COMMENT,
29        }
30
31        KEYWORDS = {
32            **tokens.Tokenizer.KEYWORDS,
33        }
IDENTIFIERS = ['`']
QUOTES = ["'", '"']
SINGLE_TOKENS = {'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.ALIAS: 'ALIAS'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.QUOTE: 'QUOTE'>, '#': <TokenType.COMMENT: 'COMMENT'>}
KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>}
class PRQL.Parser(sqlglot.parser.Parser):
 35    class Parser(parser.Parser):
 36        CONJUNCTION = {
 37            **parser.Parser.CONJUNCTION,
 38            TokenType.DAMP: exp.And,
 39            TokenType.DPIPE: exp.Or,
 40        }
 41
 42        TRANSFORM_PARSERS = {
 43            "DERIVE": lambda self, query: self._parse_selection(query),
 44            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 45            "TAKE": lambda self, query: self._parse_take(query),
 46            "FILTER": lambda self, query: query.where(self._parse_conjunction()),
 47            "APPEND": lambda self, query: query.union(
 48                _select_all(self._parse_table()), distinct=False, copy=False
 49            ),
 50            "REMOVE": lambda self, query: query.except_(
 51                _select_all(self._parse_table()), distinct=False, copy=False
 52            ),
 53            "INTERSECT": lambda self, query: query.intersect(
 54                _select_all(self._parse_table()), distinct=False, copy=False
 55            ),
 56            "SORT": lambda self, query: self._parse_order_by(query),
 57        }
 58
 59        def _parse_equality(self) -> t.Optional[exp.Expression]:
 60            eq = self._parse_tokens(self._parse_comparison, self.EQUALITY)
 61            if not isinstance(eq, (exp.EQ, exp.NEQ)):
 62                return eq
 63
 64            # https://prql-lang.org/book/reference/spec/null.html
 65            if isinstance(eq.expression, exp.Null):
 66                is_exp = exp.Is(this=eq.this, expression=eq.expression)
 67                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 68            if isinstance(eq.this, exp.Null):
 69                is_exp = exp.Is(this=eq.expression, expression=eq.this)
 70                return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp)
 71            return eq
 72
 73        def _parse_statement(self) -> t.Optional[exp.Expression]:
 74            expression = self._parse_expression()
 75            expression = expression if expression else self._parse_query()
 76            return expression
 77
 78        def _parse_query(self) -> t.Optional[exp.Query]:
 79            from_ = self._parse_from()
 80
 81            if not from_:
 82                return None
 83
 84            query = exp.select("*").from_(from_, copy=False)
 85
 86            while self._match_texts(self.TRANSFORM_PARSERS):
 87                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 88
 89            return query
 90
 91        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 92            if self._match(TokenType.L_BRACE):
 93                selects = self._parse_csv(self._parse_expression)
 94
 95                if not self._match(TokenType.R_BRACE, expression=query):
 96                    self.raise_error("Expecting }")
 97            else:
 98                expression = self._parse_expression()
 99                selects = [expression] if expression else []
100
101            projections = {
102                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
103                for select in query.selects
104            }
105
106            selects = [
107                select.transform(
108                    lambda s: (projections[s.name].copy() if s.name in projections else s)
109                    if isinstance(s, exp.Column)
110                    else s,
111                    copy=False,
112                )
113                for select in selects
114            ]
115
116            return query.select(*selects, append=append, copy=False)
117
118        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
119            num = self._parse_number()  # TODO: TAKE for ranges a..b
120            return query.limit(num) if num else None
121
122        def _parse_ordered(
123            self, parse_method: t.Optional[t.Callable] = None
124        ) -> t.Optional[exp.Ordered]:
125            asc = self._match(TokenType.PLUS)
126            desc = self._match(TokenType.DASH) or (asc and False)
127            term = term = super()._parse_ordered(parse_method=parse_method)
128            if term and desc:
129                term.set("desc", True)
130                term.set("nulls_first", False)
131            return term
132
133        def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]:
134            l_brace = self._match(TokenType.L_BRACE)
135            expressions = self._parse_csv(self._parse_ordered)
136            if l_brace and not self._match(TokenType.R_BRACE):
137                self.raise_error("Expecting }")
138            return query.order_by(self.expression(exp.Order, expressions=expressions), copy=False)
139
140        def _parse_expression(self) -> t.Optional[exp.Expression]:
141            if self._next and self._next.token_type == TokenType.ALIAS:
142                alias = self._parse_id_var(True)
143                self._match(TokenType.ALIAS)
144                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
145            return self._parse_conjunction()
146
147        def _parse_table(
148            self,
149            schema: bool = False,
150            joins: bool = False,
151            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
152            parse_bracket: bool = False,
153            is_db_reference: bool = False,
154            parse_partition: bool = False,
155        ) -> t.Optional[exp.Expression]:
156            return self._parse_table_parts()
157
158        def _parse_from(
159            self, joins: bool = False, skip_from_token: bool = False
160        ) -> t.Optional[exp.From]:
161            if not skip_from_token and not self._match(TokenType.FROM):
162                return None
163
164            return self.expression(
165                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
166            )

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
CONJUNCTION = {<TokenType.AND: 'AND'>: <class 'sqlglot.expressions.And'>, <TokenType.OR: 'OR'>: <class 'sqlglot.expressions.Or'>, <TokenType.DAMP: 'DAMP'>: <class 'sqlglot.expressions.And'>, <TokenType.DPIPE: 'DPIPE'>: <class 'sqlglot.expressions.Or'>}
TRANSFORM_PARSERS = {'DERIVE': <function PRQL.Parser.<lambda>>, 'SELECT': <function PRQL.Parser.<lambda>>, 'TAKE': <function PRQL.Parser.<lambda>>, 'FILTER': <function PRQL.Parser.<lambda>>, 'APPEND': <function PRQL.Parser.<lambda>>, 'REMOVE': <function PRQL.Parser.<lambda>>, 'INTERSECT': <function PRQL.Parser.<lambda>>, 'SORT': <function PRQL.Parser.<lambda>>}
SHOW_TRIE: Dict = {}
SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
sqlglot.parser.Parser
Parser
FUNCTIONS
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
ID_VAR_TOKENS
INTERVAL_VARS
TABLE_ALIAS_TOKENS
ALIAS_TOKENS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
FUNC_TOKENS
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
LAMBDAS
COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
STRING_PARSERS
NUMERIC_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
PROPERTY_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
NO_PAREN_FUNCTION_PARSERS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
KEY_VALUE_DEFINITIONS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
CONFLICT_ACTIONS
CREATE_SEQUENCE
ISOLATED_LOADING_OPTIONS
USABLES
CAST_ACTIONS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
VIEW_ATTRIBUTES
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
NULL_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
SELECT_START_TOKENS
STRICT_CAST
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
LOG_DEFAULTS_TO_LN
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
TABLESAMPLE_CSV
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_UNION
UNION_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
VALUES_FOLLOWED_BY_PAREN
SUPPORTS_IMPLICIT_UNNEST
INTERVAL_SPANS
SUPPORTS_PARTITION_SELECTION
error_level
error_message_context
max_errors
dialect
reset
parse
parse_into
check_errors
raise_error
expression
validate_expression
errors
sql