sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20 21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 ) 35 36 37def parse_like(args: t.List) -> exp.Escape | exp.Like: 38 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 39 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 40 41 42def binary_range_parser( 43 expr_type: t.Type[exp.Expression], 44) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 45 return lambda self, this: self._parse_escape( 46 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 47 ) 48 49 50def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 61 62 63def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _parser(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _parser 74 75 76class _Parser(type): 77 def __new__(cls, clsname, bases, attrs): 78 klass = super().__new__(cls, clsname, bases, attrs) 79 80 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 81 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 82 83 return klass 84 85 86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: Determines the amount of context to capture from a 94 query string when displaying the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": parse_like, 122 "LOG": parse_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": parse_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 } 296 297 CREATABLES = { 298 TokenType.COLUMN, 299 TokenType.CONSTRAINT, 300 TokenType.FUNCTION, 301 TokenType.INDEX, 302 TokenType.PROCEDURE, 303 TokenType.FOREIGN_KEY, 304 *DB_CREATABLES, 305 } 306 307 # Tokens that can represent identifiers 308 ID_VAR_TOKENS = { 309 TokenType.VAR, 310 TokenType.ANTI, 311 TokenType.APPLY, 312 TokenType.ASC, 313 TokenType.AUTO_INCREMENT, 314 TokenType.BEGIN, 315 TokenType.BPCHAR, 316 TokenType.CACHE, 317 TokenType.CASE, 318 TokenType.COLLATE, 319 TokenType.COMMAND, 320 TokenType.COMMENT, 321 TokenType.COMMIT, 322 TokenType.CONSTRAINT, 323 TokenType.DEFAULT, 324 TokenType.DELETE, 325 TokenType.DESC, 326 TokenType.DESCRIBE, 327 TokenType.DICTIONARY, 328 TokenType.DIV, 329 TokenType.END, 330 TokenType.EXECUTE, 331 TokenType.ESCAPE, 332 TokenType.FALSE, 333 TokenType.FIRST, 334 TokenType.FILTER, 335 TokenType.FINAL, 336 TokenType.FORMAT, 337 TokenType.FULL, 338 TokenType.IS, 339 TokenType.ISNULL, 340 TokenType.INTERVAL, 341 TokenType.KEEP, 342 TokenType.KILL, 343 TokenType.LEFT, 344 TokenType.LOAD, 345 TokenType.MERGE, 346 TokenType.NATURAL, 347 TokenType.NEXT, 348 TokenType.OFFSET, 349 TokenType.OPERATOR, 350 TokenType.ORDINALITY, 351 TokenType.OVERLAPS, 352 TokenType.OVERWRITE, 353 TokenType.PARTITION, 354 TokenType.PERCENT, 355 TokenType.PIVOT, 356 TokenType.PRAGMA, 357 TokenType.RANGE, 358 TokenType.RECURSIVE, 359 TokenType.REFERENCES, 360 TokenType.REFRESH, 361 TokenType.REPLACE, 362 TokenType.RIGHT, 363 TokenType.ROW, 364 TokenType.ROWS, 365 TokenType.SEMI, 366 TokenType.SET, 367 TokenType.SETTINGS, 368 TokenType.SHOW, 369 TokenType.TEMPORARY, 370 TokenType.TOP, 371 TokenType.TRUE, 372 TokenType.UNIQUE, 373 TokenType.UNPIVOT, 374 TokenType.UPDATE, 375 TokenType.USE, 376 TokenType.VOLATILE, 377 TokenType.WINDOW, 378 *CREATABLES, 379 *SUBQUERY_PREDICATES, 380 *TYPE_TOKENS, 381 *NO_PAREN_FUNCTIONS, 382 } 383 384 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 385 386 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 387 TokenType.ANTI, 388 TokenType.APPLY, 389 TokenType.ASOF, 390 TokenType.FULL, 391 TokenType.LEFT, 392 TokenType.LOCK, 393 TokenType.NATURAL, 394 TokenType.OFFSET, 395 TokenType.RIGHT, 396 TokenType.SEMI, 397 TokenType.WINDOW, 398 } 399 400 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 401 402 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 403 404 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 405 406 FUNC_TOKENS = { 407 TokenType.COLLATE, 408 TokenType.COMMAND, 409 TokenType.CURRENT_DATE, 410 TokenType.CURRENT_DATETIME, 411 TokenType.CURRENT_TIMESTAMP, 412 TokenType.CURRENT_TIME, 413 TokenType.CURRENT_USER, 414 TokenType.FILTER, 415 TokenType.FIRST, 416 TokenType.FORMAT, 417 TokenType.GLOB, 418 TokenType.IDENTIFIER, 419 TokenType.INDEX, 420 TokenType.ISNULL, 421 TokenType.ILIKE, 422 TokenType.INSERT, 423 TokenType.LIKE, 424 TokenType.MERGE, 425 TokenType.OFFSET, 426 TokenType.PRIMARY_KEY, 427 TokenType.RANGE, 428 TokenType.REPLACE, 429 TokenType.RLIKE, 430 TokenType.ROW, 431 TokenType.UNNEST, 432 TokenType.VAR, 433 TokenType.LEFT, 434 TokenType.RIGHT, 435 TokenType.DATE, 436 TokenType.DATETIME, 437 TokenType.TABLE, 438 TokenType.TIMESTAMP, 439 TokenType.TIMESTAMPTZ, 440 TokenType.WINDOW, 441 TokenType.XOR, 442 *TYPE_TOKENS, 443 *SUBQUERY_PREDICATES, 444 } 445 446 CONJUNCTION = { 447 TokenType.AND: exp.And, 448 TokenType.OR: exp.Or, 449 } 450 451 EQUALITY = { 452 TokenType.COLON_EQ: exp.PropertyEQ, 453 TokenType.EQ: exp.EQ, 454 TokenType.NEQ: exp.NEQ, 455 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 456 } 457 458 COMPARISON = { 459 TokenType.GT: exp.GT, 460 TokenType.GTE: exp.GTE, 461 TokenType.LT: exp.LT, 462 TokenType.LTE: exp.LTE, 463 } 464 465 BITWISE = { 466 TokenType.AMP: exp.BitwiseAnd, 467 TokenType.CARET: exp.BitwiseXor, 468 TokenType.PIPE: exp.BitwiseOr, 469 } 470 471 TERM = { 472 TokenType.DASH: exp.Sub, 473 TokenType.PLUS: exp.Add, 474 TokenType.MOD: exp.Mod, 475 TokenType.COLLATE: exp.Collate, 476 } 477 478 FACTOR = { 479 TokenType.DIV: exp.IntDiv, 480 TokenType.LR_ARROW: exp.Distance, 481 TokenType.SLASH: exp.Div, 482 TokenType.STAR: exp.Mul, 483 } 484 485 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 486 487 TIMES = { 488 TokenType.TIME, 489 TokenType.TIMETZ, 490 } 491 492 TIMESTAMPS = { 493 TokenType.TIMESTAMP, 494 TokenType.TIMESTAMPTZ, 495 TokenType.TIMESTAMPLTZ, 496 *TIMES, 497 } 498 499 SET_OPERATIONS = { 500 TokenType.UNION, 501 TokenType.INTERSECT, 502 TokenType.EXCEPT, 503 } 504 505 JOIN_METHODS = { 506 TokenType.NATURAL, 507 TokenType.ASOF, 508 } 509 510 JOIN_SIDES = { 511 TokenType.LEFT, 512 TokenType.RIGHT, 513 TokenType.FULL, 514 } 515 516 JOIN_KINDS = { 517 TokenType.INNER, 518 TokenType.OUTER, 519 TokenType.CROSS, 520 TokenType.SEMI, 521 TokenType.ANTI, 522 } 523 524 JOIN_HINTS: t.Set[str] = set() 525 526 LAMBDAS = { 527 TokenType.ARROW: lambda self, expressions: self.expression( 528 exp.Lambda, 529 this=self._replace_lambda( 530 self._parse_conjunction(), 531 {node.name for node in expressions}, 532 ), 533 expressions=expressions, 534 ), 535 TokenType.FARROW: lambda self, expressions: self.expression( 536 exp.Kwarg, 537 this=exp.var(expressions[0].name), 538 expression=self._parse_conjunction(), 539 ), 540 } 541 542 COLUMN_OPERATORS = { 543 TokenType.DOT: None, 544 TokenType.DCOLON: lambda self, this, to: self.expression( 545 exp.Cast if self.STRICT_CAST else exp.TryCast, 546 this=this, 547 to=to, 548 ), 549 TokenType.ARROW: lambda self, this, path: self.expression( 550 exp.JSONExtract, 551 this=this, 552 expression=self.dialect.to_json_path(path), 553 ), 554 TokenType.DARROW: lambda self, this, path: self.expression( 555 exp.JSONExtractScalar, 556 this=this, 557 expression=self.dialect.to_json_path(path), 558 ), 559 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 560 exp.JSONBExtract, 561 this=this, 562 expression=path, 563 ), 564 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 565 exp.JSONBExtractScalar, 566 this=this, 567 expression=path, 568 ), 569 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 570 exp.JSONBContains, 571 this=this, 572 expression=key, 573 ), 574 } 575 576 EXPRESSION_PARSERS = { 577 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 578 exp.Column: lambda self: self._parse_column(), 579 exp.Condition: lambda self: self._parse_conjunction(), 580 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 581 exp.Expression: lambda self: self._parse_statement(), 582 exp.From: lambda self: self._parse_from(), 583 exp.Group: lambda self: self._parse_group(), 584 exp.Having: lambda self: self._parse_having(), 585 exp.Identifier: lambda self: self._parse_id_var(), 586 exp.Join: lambda self: self._parse_join(), 587 exp.Lambda: lambda self: self._parse_lambda(), 588 exp.Lateral: lambda self: self._parse_lateral(), 589 exp.Limit: lambda self: self._parse_limit(), 590 exp.Offset: lambda self: self._parse_offset(), 591 exp.Order: lambda self: self._parse_order(), 592 exp.Ordered: lambda self: self._parse_ordered(), 593 exp.Properties: lambda self: self._parse_properties(), 594 exp.Qualify: lambda self: self._parse_qualify(), 595 exp.Returning: lambda self: self._parse_returning(), 596 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 597 exp.Table: lambda self: self._parse_table_parts(), 598 exp.TableAlias: lambda self: self._parse_table_alias(), 599 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 600 exp.Where: lambda self: self._parse_where(), 601 exp.Window: lambda self: self._parse_named_window(), 602 exp.With: lambda self: self._parse_with(), 603 "JOIN_TYPE": lambda self: self._parse_join_parts(), 604 } 605 606 STATEMENT_PARSERS = { 607 TokenType.ALTER: lambda self: self._parse_alter(), 608 TokenType.BEGIN: lambda self: self._parse_transaction(), 609 TokenType.CACHE: lambda self: self._parse_cache(), 610 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 611 TokenType.COMMENT: lambda self: self._parse_comment(), 612 TokenType.CREATE: lambda self: self._parse_create(), 613 TokenType.DELETE: lambda self: self._parse_delete(), 614 TokenType.DESC: lambda self: self._parse_describe(), 615 TokenType.DESCRIBE: lambda self: self._parse_describe(), 616 TokenType.DROP: lambda self: self._parse_drop(), 617 TokenType.INSERT: lambda self: self._parse_insert(), 618 TokenType.KILL: lambda self: self._parse_kill(), 619 TokenType.LOAD: lambda self: self._parse_load(), 620 TokenType.MERGE: lambda self: self._parse_merge(), 621 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 622 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 623 TokenType.REFRESH: lambda self: self._parse_refresh(), 624 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 625 TokenType.SET: lambda self: self._parse_set(), 626 TokenType.UNCACHE: lambda self: self._parse_uncache(), 627 TokenType.UPDATE: lambda self: self._parse_update(), 628 TokenType.USE: lambda self: self.expression( 629 exp.Use, 630 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 631 and exp.var(self._prev.text), 632 this=self._parse_table(schema=False), 633 ), 634 } 635 636 UNARY_PARSERS = { 637 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 638 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 639 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 640 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 641 } 642 643 PRIMARY_PARSERS = { 644 TokenType.STRING: lambda self, token: self.expression( 645 exp.Literal, this=token.text, is_string=True 646 ), 647 TokenType.NUMBER: lambda self, token: self.expression( 648 exp.Literal, this=token.text, is_string=False 649 ), 650 TokenType.STAR: lambda self, _: self.expression( 651 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 652 ), 653 TokenType.NULL: lambda self, _: self.expression(exp.Null), 654 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 655 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 656 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 657 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 658 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 659 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 660 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 661 exp.National, this=token.text 662 ), 663 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.UNICODE_STRING: lambda self, token: self.expression( 668 exp.UnicodeString, 669 this=token.text, 670 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 671 ), 672 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 673 } 674 675 PLACEHOLDER_PARSERS = { 676 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 677 TokenType.PARAMETER: lambda self: self._parse_parameter(), 678 TokenType.COLON: lambda self: ( 679 self.expression(exp.Placeholder, this=self._prev.text) 680 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 681 else None 682 ), 683 } 684 685 RANGE_PARSERS = { 686 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 687 TokenType.GLOB: binary_range_parser(exp.Glob), 688 TokenType.ILIKE: binary_range_parser(exp.ILike), 689 TokenType.IN: lambda self, this: self._parse_in(this), 690 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 691 TokenType.IS: lambda self, this: self._parse_is(this), 692 TokenType.LIKE: binary_range_parser(exp.Like), 693 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 694 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 695 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 696 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 697 } 698 699 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 700 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 701 "AUTO": lambda self: self._parse_auto_property(), 702 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 703 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 704 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 705 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 706 "CHECKSUM": lambda self: self._parse_checksum(), 707 "CLUSTER BY": lambda self: self._parse_cluster(), 708 "CLUSTERED": lambda self: self._parse_clustered_by(), 709 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 710 exp.CollateProperty, **kwargs 711 ), 712 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 713 "CONTAINS": lambda self: self._parse_contains_property(), 714 "COPY": lambda self: self._parse_copy_property(), 715 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 716 "DEFINER": lambda self: self._parse_definer(), 717 "DETERMINISTIC": lambda self: self.expression( 718 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 719 ), 720 "DISTKEY": lambda self: self._parse_distkey(), 721 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 722 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 723 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 724 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 725 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 726 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 727 "FREESPACE": lambda self: self._parse_freespace(), 728 "HEAP": lambda self: self.expression(exp.HeapProperty), 729 "IMMUTABLE": lambda self: self.expression( 730 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 731 ), 732 "INHERITS": lambda self: self.expression( 733 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 734 ), 735 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 736 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 737 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 738 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 739 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 740 "LIKE": lambda self: self._parse_create_like(), 741 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 742 "LOCK": lambda self: self._parse_locking(), 743 "LOCKING": lambda self: self._parse_locking(), 744 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 745 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 746 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 747 "MODIFIES": lambda self: self._parse_modifies_property(), 748 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 749 "NO": lambda self: self._parse_no_property(), 750 "ON": lambda self: self._parse_on_property(), 751 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 752 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 753 "PARTITION": lambda self: self._parse_partitioned_of(), 754 "PARTITION BY": lambda self: self._parse_partitioned_by(), 755 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 756 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 757 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 758 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 759 "READS": lambda self: self._parse_reads_property(), 760 "REMOTE": lambda self: self._parse_remote_with_connection(), 761 "RETURNS": lambda self: self._parse_returns(), 762 "ROW": lambda self: self._parse_row(), 763 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 764 "SAMPLE": lambda self: self.expression( 765 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 766 ), 767 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 768 "SETTINGS": lambda self: self.expression( 769 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 770 ), 771 "SORTKEY": lambda self: self._parse_sortkey(), 772 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 773 "STABLE": lambda self: self.expression( 774 exp.StabilityProperty, this=exp.Literal.string("STABLE") 775 ), 776 "STORED": lambda self: self._parse_stored(), 777 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 778 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 779 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 780 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 781 "TO": lambda self: self._parse_to_table(), 782 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 783 "TRANSFORM": lambda self: self.expression( 784 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 785 ), 786 "TTL": lambda self: self._parse_ttl(), 787 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 788 "VOLATILE": lambda self: self._parse_volatile_property(), 789 "WITH": lambda self: self._parse_with_property(), 790 } 791 792 CONSTRAINT_PARSERS = { 793 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 794 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 795 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 796 "CHARACTER SET": lambda self: self.expression( 797 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 798 ), 799 "CHECK": lambda self: self.expression( 800 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 801 ), 802 "COLLATE": lambda self: self.expression( 803 exp.CollateColumnConstraint, this=self._parse_var() 804 ), 805 "COMMENT": lambda self: self.expression( 806 exp.CommentColumnConstraint, this=self._parse_string() 807 ), 808 "COMPRESS": lambda self: self._parse_compress(), 809 "CLUSTERED": lambda self: self.expression( 810 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 811 ), 812 "NONCLUSTERED": lambda self: self.expression( 813 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 814 ), 815 "DEFAULT": lambda self: self.expression( 816 exp.DefaultColumnConstraint, this=self._parse_bitwise() 817 ), 818 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 819 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 820 "FORMAT": lambda self: self.expression( 821 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 822 ), 823 "GENERATED": lambda self: self._parse_generated_as_identity(), 824 "IDENTITY": lambda self: self._parse_auto_increment(), 825 "INLINE": lambda self: self._parse_inline(), 826 "LIKE": lambda self: self._parse_create_like(), 827 "NOT": lambda self: self._parse_not_constraint(), 828 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 829 "ON": lambda self: ( 830 self._match(TokenType.UPDATE) 831 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 832 ) 833 or self.expression(exp.OnProperty, this=self._parse_id_var()), 834 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 835 "PERIOD": lambda self: self._parse_period_for_system_time(), 836 "PRIMARY KEY": lambda self: self._parse_primary_key(), 837 "REFERENCES": lambda self: self._parse_references(match=False), 838 "TITLE": lambda self: self.expression( 839 exp.TitleColumnConstraint, this=self._parse_var_or_string() 840 ), 841 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 842 "UNIQUE": lambda self: self._parse_unique(), 843 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 844 "WITH": lambda self: self.expression( 845 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 846 ), 847 } 848 849 ALTER_PARSERS = { 850 "ADD": lambda self: self._parse_alter_table_add(), 851 "ALTER": lambda self: self._parse_alter_table_alter(), 852 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 853 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 854 "DROP": lambda self: self._parse_alter_table_drop(), 855 "RENAME": lambda self: self._parse_alter_table_rename(), 856 } 857 858 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 859 860 NO_PAREN_FUNCTION_PARSERS = { 861 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 862 "CASE": lambda self: self._parse_case(), 863 "IF": lambda self: self._parse_if(), 864 "NEXT": lambda self: self._parse_next_value_for(), 865 } 866 867 INVALID_FUNC_NAME_TOKENS = { 868 TokenType.IDENTIFIER, 869 TokenType.STRING, 870 } 871 872 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 873 874 FUNCTION_PARSERS = { 875 "ANY_VALUE": lambda self: self._parse_any_value(), 876 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 877 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 878 "DECODE": lambda self: self._parse_decode(), 879 "EXTRACT": lambda self: self._parse_extract(), 880 "JSON_OBJECT": lambda self: self._parse_json_object(), 881 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 882 "JSON_TABLE": lambda self: self._parse_json_table(), 883 "MATCH": lambda self: self._parse_match_against(), 884 "OPENJSON": lambda self: self._parse_open_json(), 885 "POSITION": lambda self: self._parse_position(), 886 "PREDICT": lambda self: self._parse_predict(), 887 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 888 "STRING_AGG": lambda self: self._parse_string_agg(), 889 "SUBSTRING": lambda self: self._parse_substring(), 890 "TRIM": lambda self: self._parse_trim(), 891 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 892 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 893 } 894 895 QUERY_MODIFIER_PARSERS = { 896 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 897 TokenType.WHERE: lambda self: ("where", self._parse_where()), 898 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 899 TokenType.HAVING: lambda self: ("having", self._parse_having()), 900 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 901 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 902 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 903 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 904 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 905 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 906 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 907 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 908 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 909 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.CLUSTER_BY: lambda self: ( 911 "cluster", 912 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 913 ), 914 TokenType.DISTRIBUTE_BY: lambda self: ( 915 "distribute", 916 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 917 ), 918 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 919 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 920 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 921 } 922 923 SET_PARSERS = { 924 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 925 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 926 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 927 "TRANSACTION": lambda self: self._parse_set_transaction(), 928 } 929 930 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 931 932 TYPE_LITERAL_PARSERS = { 933 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 934 } 935 936 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 937 938 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 939 940 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 941 942 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 943 TRANSACTION_CHARACTERISTICS = { 944 "ISOLATION LEVEL REPEATABLE READ", 945 "ISOLATION LEVEL READ COMMITTED", 946 "ISOLATION LEVEL READ UNCOMMITTED", 947 "ISOLATION LEVEL SERIALIZABLE", 948 "READ WRITE", 949 "READ ONLY", 950 } 951 952 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 953 954 CLONE_KEYWORDS = {"CLONE", "COPY"} 955 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 956 957 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 958 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 959 960 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 961 962 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 963 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 964 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 965 966 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 967 968 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 969 970 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 971 972 DISTINCT_TOKENS = {TokenType.DISTINCT} 973 974 NULL_TOKENS = {TokenType.NULL} 975 976 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 977 978 STRICT_CAST = True 979 980 PREFIXED_PIVOT_COLUMNS = False 981 IDENTIFY_PIVOT_STRINGS = False 982 983 LOG_DEFAULTS_TO_LN = False 984 985 # Whether or not ADD is present for each column added by ALTER TABLE 986 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 987 988 # Whether or not the table sample clause expects CSV syntax 989 TABLESAMPLE_CSV = False 990 991 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 992 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 993 994 # Whether the TRIM function expects the characters to trim as its first argument 995 TRIM_PATTERN_FIRST = False 996 997 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 998 STRING_ALIASES = False 999 1000 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1001 MODIFIERS_ATTACHED_TO_UNION = True 1002 UNION_MODIFIERS = {"order", "limit", "offset"} 1003 1004 # parses no parenthesis if statements as commands 1005 NO_PAREN_IF_COMMANDS = True 1006 1007 __slots__ = ( 1008 "error_level", 1009 "error_message_context", 1010 "max_errors", 1011 "dialect", 1012 "sql", 1013 "errors", 1014 "_tokens", 1015 "_index", 1016 "_curr", 1017 "_next", 1018 "_prev", 1019 "_prev_comments", 1020 ) 1021 1022 # Autofilled 1023 SHOW_TRIE: t.Dict = {} 1024 SET_TRIE: t.Dict = {} 1025 1026 def __init__( 1027 self, 1028 error_level: t.Optional[ErrorLevel] = None, 1029 error_message_context: int = 100, 1030 max_errors: int = 3, 1031 dialect: DialectType = None, 1032 ): 1033 from sqlglot.dialects import Dialect 1034 1035 self.error_level = error_level or ErrorLevel.IMMEDIATE 1036 self.error_message_context = error_message_context 1037 self.max_errors = max_errors 1038 self.dialect = Dialect.get_or_raise(dialect) 1039 self.reset() 1040 1041 def reset(self): 1042 self.sql = "" 1043 self.errors = [] 1044 self._tokens = [] 1045 self._index = 0 1046 self._curr = None 1047 self._next = None 1048 self._prev = None 1049 self._prev_comments = None 1050 1051 def parse( 1052 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1053 ) -> t.List[t.Optional[exp.Expression]]: 1054 """ 1055 Parses a list of tokens and returns a list of syntax trees, one tree 1056 per parsed SQL statement. 1057 1058 Args: 1059 raw_tokens: The list of tokens. 1060 sql: The original SQL string, used to produce helpful debug messages. 1061 1062 Returns: 1063 The list of the produced syntax trees. 1064 """ 1065 return self._parse( 1066 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1067 ) 1068 1069 def parse_into( 1070 self, 1071 expression_types: exp.IntoType, 1072 raw_tokens: t.List[Token], 1073 sql: t.Optional[str] = None, 1074 ) -> t.List[t.Optional[exp.Expression]]: 1075 """ 1076 Parses a list of tokens into a given Expression type. If a collection of Expression 1077 types is given instead, this method will try to parse the token list into each one 1078 of them, stopping at the first for which the parsing succeeds. 1079 1080 Args: 1081 expression_types: The expression type(s) to try and parse the token list into. 1082 raw_tokens: The list of tokens. 1083 sql: The original SQL string, used to produce helpful debug messages. 1084 1085 Returns: 1086 The target Expression. 1087 """ 1088 errors = [] 1089 for expression_type in ensure_list(expression_types): 1090 parser = self.EXPRESSION_PARSERS.get(expression_type) 1091 if not parser: 1092 raise TypeError(f"No parser registered for {expression_type}") 1093 1094 try: 1095 return self._parse(parser, raw_tokens, sql) 1096 except ParseError as e: 1097 e.errors[0]["into_expression"] = expression_type 1098 errors.append(e) 1099 1100 raise ParseError( 1101 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1102 errors=merge_errors(errors), 1103 ) from errors[-1] 1104 1105 def _parse( 1106 self, 1107 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1108 raw_tokens: t.List[Token], 1109 sql: t.Optional[str] = None, 1110 ) -> t.List[t.Optional[exp.Expression]]: 1111 self.reset() 1112 self.sql = sql or "" 1113 1114 total = len(raw_tokens) 1115 chunks: t.List[t.List[Token]] = [[]] 1116 1117 for i, token in enumerate(raw_tokens): 1118 if token.token_type == TokenType.SEMICOLON: 1119 if i < total - 1: 1120 chunks.append([]) 1121 else: 1122 chunks[-1].append(token) 1123 1124 expressions = [] 1125 1126 for tokens in chunks: 1127 self._index = -1 1128 self._tokens = tokens 1129 self._advance() 1130 1131 expressions.append(parse_method(self)) 1132 1133 if self._index < len(self._tokens): 1134 self.raise_error("Invalid expression / Unexpected token") 1135 1136 self.check_errors() 1137 1138 return expressions 1139 1140 def check_errors(self) -> None: 1141 """Logs or raises any found errors, depending on the chosen error level setting.""" 1142 if self.error_level == ErrorLevel.WARN: 1143 for error in self.errors: 1144 logger.error(str(error)) 1145 elif self.error_level == ErrorLevel.RAISE and self.errors: 1146 raise ParseError( 1147 concat_messages(self.errors, self.max_errors), 1148 errors=merge_errors(self.errors), 1149 ) 1150 1151 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1152 """ 1153 Appends an error in the list of recorded errors or raises it, depending on the chosen 1154 error level setting. 1155 """ 1156 token = token or self._curr or self._prev or Token.string("") 1157 start = token.start 1158 end = token.end + 1 1159 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1160 highlight = self.sql[start:end] 1161 end_context = self.sql[end : end + self.error_message_context] 1162 1163 error = ParseError.new( 1164 f"{message}. Line {token.line}, Col: {token.col}.\n" 1165 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1166 description=message, 1167 line=token.line, 1168 col=token.col, 1169 start_context=start_context, 1170 highlight=highlight, 1171 end_context=end_context, 1172 ) 1173 1174 if self.error_level == ErrorLevel.IMMEDIATE: 1175 raise error 1176 1177 self.errors.append(error) 1178 1179 def expression( 1180 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1181 ) -> E: 1182 """ 1183 Creates a new, validated Expression. 1184 1185 Args: 1186 exp_class: The expression class to instantiate. 1187 comments: An optional list of comments to attach to the expression. 1188 kwargs: The arguments to set for the expression along with their respective values. 1189 1190 Returns: 1191 The target expression. 1192 """ 1193 instance = exp_class(**kwargs) 1194 instance.add_comments(comments) if comments else self._add_comments(instance) 1195 return self.validate_expression(instance) 1196 1197 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1198 if expression and self._prev_comments: 1199 expression.add_comments(self._prev_comments) 1200 self._prev_comments = None 1201 1202 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1203 """ 1204 Validates an Expression, making sure that all its mandatory arguments are set. 1205 1206 Args: 1207 expression: The expression to validate. 1208 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1209 1210 Returns: 1211 The validated expression. 1212 """ 1213 if self.error_level != ErrorLevel.IGNORE: 1214 for error_message in expression.error_messages(args): 1215 self.raise_error(error_message) 1216 1217 return expression 1218 1219 def _find_sql(self, start: Token, end: Token) -> str: 1220 return self.sql[start.start : end.end + 1] 1221 1222 def _is_connected(self) -> bool: 1223 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1224 1225 def _advance(self, times: int = 1) -> None: 1226 self._index += times 1227 self._curr = seq_get(self._tokens, self._index) 1228 self._next = seq_get(self._tokens, self._index + 1) 1229 1230 if self._index > 0: 1231 self._prev = self._tokens[self._index - 1] 1232 self._prev_comments = self._prev.comments 1233 else: 1234 self._prev = None 1235 self._prev_comments = None 1236 1237 def _retreat(self, index: int) -> None: 1238 if index != self._index: 1239 self._advance(index - self._index) 1240 1241 def _warn_unsupported(self) -> None: 1242 if len(self._tokens) <= 1: 1243 return 1244 1245 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1246 # interested in emitting a warning for the one being currently processed. 1247 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1248 1249 logger.warning( 1250 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1251 ) 1252 1253 def _parse_command(self) -> exp.Command: 1254 self._warn_unsupported() 1255 return self.expression( 1256 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1257 ) 1258 1259 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1260 start = self._prev 1261 exists = self._parse_exists() if allow_exists else None 1262 1263 self._match(TokenType.ON) 1264 1265 kind = self._match_set(self.CREATABLES) and self._prev 1266 if not kind: 1267 return self._parse_as_command(start) 1268 1269 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1270 this = self._parse_user_defined_function(kind=kind.token_type) 1271 elif kind.token_type == TokenType.TABLE: 1272 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1273 elif kind.token_type == TokenType.COLUMN: 1274 this = self._parse_column() 1275 else: 1276 this = self._parse_id_var() 1277 1278 self._match(TokenType.IS) 1279 1280 return self.expression( 1281 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1282 ) 1283 1284 def _parse_to_table( 1285 self, 1286 ) -> exp.ToTableProperty: 1287 table = self._parse_table_parts(schema=True) 1288 return self.expression(exp.ToTableProperty, this=table) 1289 1290 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1291 def _parse_ttl(self) -> exp.Expression: 1292 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1293 this = self._parse_bitwise() 1294 1295 if self._match_text_seq("DELETE"): 1296 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1297 if self._match_text_seq("RECOMPRESS"): 1298 return self.expression( 1299 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1300 ) 1301 if self._match_text_seq("TO", "DISK"): 1302 return self.expression( 1303 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1304 ) 1305 if self._match_text_seq("TO", "VOLUME"): 1306 return self.expression( 1307 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1308 ) 1309 1310 return this 1311 1312 expressions = self._parse_csv(_parse_ttl_action) 1313 where = self._parse_where() 1314 group = self._parse_group() 1315 1316 aggregates = None 1317 if group and self._match(TokenType.SET): 1318 aggregates = self._parse_csv(self._parse_set_item) 1319 1320 return self.expression( 1321 exp.MergeTreeTTL, 1322 expressions=expressions, 1323 where=where, 1324 group=group, 1325 aggregates=aggregates, 1326 ) 1327 1328 def _parse_statement(self) -> t.Optional[exp.Expression]: 1329 if self._curr is None: 1330 return None 1331 1332 if self._match_set(self.STATEMENT_PARSERS): 1333 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1334 1335 if self._match_set(Tokenizer.COMMANDS): 1336 return self._parse_command() 1337 1338 expression = self._parse_expression() 1339 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1340 return self._parse_query_modifiers(expression) 1341 1342 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1343 start = self._prev 1344 temporary = self._match(TokenType.TEMPORARY) 1345 materialized = self._match_text_seq("MATERIALIZED") 1346 1347 kind = self._match_set(self.CREATABLES) and self._prev.text 1348 if not kind: 1349 return self._parse_as_command(start) 1350 1351 return self.expression( 1352 exp.Drop, 1353 comments=start.comments, 1354 exists=exists or self._parse_exists(), 1355 this=self._parse_table( 1356 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1357 ), 1358 kind=kind, 1359 temporary=temporary, 1360 materialized=materialized, 1361 cascade=self._match_text_seq("CASCADE"), 1362 constraints=self._match_text_seq("CONSTRAINTS"), 1363 purge=self._match_text_seq("PURGE"), 1364 ) 1365 1366 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1367 return ( 1368 self._match_text_seq("IF") 1369 and (not not_ or self._match(TokenType.NOT)) 1370 and self._match(TokenType.EXISTS) 1371 ) 1372 1373 def _parse_create(self) -> exp.Create | exp.Command: 1374 # Note: this can't be None because we've matched a statement parser 1375 start = self._prev 1376 comments = self._prev_comments 1377 1378 replace = ( 1379 start.token_type == TokenType.REPLACE 1380 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1381 or self._match_pair(TokenType.OR, TokenType.ALTER) 1382 ) 1383 unique = self._match(TokenType.UNIQUE) 1384 1385 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1386 self._advance() 1387 1388 properties = None 1389 create_token = self._match_set(self.CREATABLES) and self._prev 1390 1391 if not create_token: 1392 # exp.Properties.Location.POST_CREATE 1393 properties = self._parse_properties() 1394 create_token = self._match_set(self.CREATABLES) and self._prev 1395 1396 if not properties or not create_token: 1397 return self._parse_as_command(start) 1398 1399 exists = self._parse_exists(not_=True) 1400 this = None 1401 expression: t.Optional[exp.Expression] = None 1402 indexes = None 1403 no_schema_binding = None 1404 begin = None 1405 end = None 1406 clone = None 1407 1408 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1409 nonlocal properties 1410 if properties and temp_props: 1411 properties.expressions.extend(temp_props.expressions) 1412 elif temp_props: 1413 properties = temp_props 1414 1415 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1416 this = self._parse_user_defined_function(kind=create_token.token_type) 1417 1418 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1419 extend_props(self._parse_properties()) 1420 1421 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1422 1423 if not expression: 1424 if self._match(TokenType.COMMAND): 1425 expression = self._parse_as_command(self._prev) 1426 else: 1427 begin = self._match(TokenType.BEGIN) 1428 return_ = self._match_text_seq("RETURN") 1429 1430 if self._match(TokenType.STRING, advance=False): 1431 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1432 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1433 expression = self._parse_string() 1434 extend_props(self._parse_properties()) 1435 else: 1436 expression = self._parse_statement() 1437 1438 end = self._match_text_seq("END") 1439 1440 if return_: 1441 expression = self.expression(exp.Return, this=expression) 1442 elif create_token.token_type == TokenType.INDEX: 1443 this = self._parse_index(index=self._parse_id_var()) 1444 elif create_token.token_type in self.DB_CREATABLES: 1445 table_parts = self._parse_table_parts( 1446 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1447 ) 1448 1449 # exp.Properties.Location.POST_NAME 1450 self._match(TokenType.COMMA) 1451 extend_props(self._parse_properties(before=True)) 1452 1453 this = self._parse_schema(this=table_parts) 1454 1455 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1456 extend_props(self._parse_properties()) 1457 1458 self._match(TokenType.ALIAS) 1459 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1460 # exp.Properties.Location.POST_ALIAS 1461 extend_props(self._parse_properties()) 1462 1463 expression = self._parse_ddl_select() 1464 1465 if create_token.token_type == TokenType.TABLE: 1466 # exp.Properties.Location.POST_EXPRESSION 1467 extend_props(self._parse_properties()) 1468 1469 indexes = [] 1470 while True: 1471 index = self._parse_index() 1472 1473 # exp.Properties.Location.POST_INDEX 1474 extend_props(self._parse_properties()) 1475 1476 if not index: 1477 break 1478 else: 1479 self._match(TokenType.COMMA) 1480 indexes.append(index) 1481 elif create_token.token_type == TokenType.VIEW: 1482 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1483 no_schema_binding = True 1484 1485 shallow = self._match_text_seq("SHALLOW") 1486 1487 if self._match_texts(self.CLONE_KEYWORDS): 1488 copy = self._prev.text.lower() == "copy" 1489 clone = self.expression( 1490 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1491 ) 1492 1493 if self._curr: 1494 return self._parse_as_command(start) 1495 1496 return self.expression( 1497 exp.Create, 1498 comments=comments, 1499 this=this, 1500 kind=create_token.text.upper(), 1501 replace=replace, 1502 unique=unique, 1503 expression=expression, 1504 exists=exists, 1505 properties=properties, 1506 indexes=indexes, 1507 no_schema_binding=no_schema_binding, 1508 begin=begin, 1509 end=end, 1510 clone=clone, 1511 ) 1512 1513 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1514 # only used for teradata currently 1515 self._match(TokenType.COMMA) 1516 1517 kwargs = { 1518 "no": self._match_text_seq("NO"), 1519 "dual": self._match_text_seq("DUAL"), 1520 "before": self._match_text_seq("BEFORE"), 1521 "default": self._match_text_seq("DEFAULT"), 1522 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1523 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1524 "after": self._match_text_seq("AFTER"), 1525 "minimum": self._match_texts(("MIN", "MINIMUM")), 1526 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1527 } 1528 1529 if self._match_texts(self.PROPERTY_PARSERS): 1530 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1531 try: 1532 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1533 except TypeError: 1534 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1535 1536 return None 1537 1538 def _parse_property(self) -> t.Optional[exp.Expression]: 1539 if self._match_texts(self.PROPERTY_PARSERS): 1540 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1541 1542 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1543 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1544 1545 if self._match_text_seq("COMPOUND", "SORTKEY"): 1546 return self._parse_sortkey(compound=True) 1547 1548 if self._match_text_seq("SQL", "SECURITY"): 1549 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1550 1551 index = self._index 1552 key = self._parse_column() 1553 1554 if not self._match(TokenType.EQ): 1555 self._retreat(index) 1556 return None 1557 1558 return self.expression( 1559 exp.Property, 1560 this=key.to_dot() if isinstance(key, exp.Column) else key, 1561 value=self._parse_column() or self._parse_var(any_token=True), 1562 ) 1563 1564 def _parse_stored(self) -> exp.FileFormatProperty: 1565 self._match(TokenType.ALIAS) 1566 1567 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1568 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1569 1570 return self.expression( 1571 exp.FileFormatProperty, 1572 this=( 1573 self.expression( 1574 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1575 ) 1576 if input_format or output_format 1577 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1578 ), 1579 ) 1580 1581 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1582 self._match(TokenType.EQ) 1583 self._match(TokenType.ALIAS) 1584 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1585 1586 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1587 properties = [] 1588 while True: 1589 if before: 1590 prop = self._parse_property_before() 1591 else: 1592 prop = self._parse_property() 1593 1594 if not prop: 1595 break 1596 for p in ensure_list(prop): 1597 properties.append(p) 1598 1599 if properties: 1600 return self.expression(exp.Properties, expressions=properties) 1601 1602 return None 1603 1604 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1605 return self.expression( 1606 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1607 ) 1608 1609 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1610 if self._index >= 2: 1611 pre_volatile_token = self._tokens[self._index - 2] 1612 else: 1613 pre_volatile_token = None 1614 1615 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1616 return exp.VolatileProperty() 1617 1618 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1619 1620 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1621 self._match_pair(TokenType.EQ, TokenType.ON) 1622 1623 prop = self.expression(exp.WithSystemVersioningProperty) 1624 if self._match(TokenType.L_PAREN): 1625 self._match_text_seq("HISTORY_TABLE", "=") 1626 prop.set("this", self._parse_table_parts()) 1627 1628 if self._match(TokenType.COMMA): 1629 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1630 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1631 1632 self._match_r_paren() 1633 1634 return prop 1635 1636 def _parse_with_property( 1637 self, 1638 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1639 if self._match(TokenType.L_PAREN, advance=False): 1640 return self._parse_wrapped_csv(self._parse_property) 1641 1642 if self._match_text_seq("JOURNAL"): 1643 return self._parse_withjournaltable() 1644 1645 if self._match_text_seq("DATA"): 1646 return self._parse_withdata(no=False) 1647 elif self._match_text_seq("NO", "DATA"): 1648 return self._parse_withdata(no=True) 1649 1650 if not self._next: 1651 return None 1652 1653 return self._parse_withisolatedloading() 1654 1655 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1656 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1657 self._match(TokenType.EQ) 1658 1659 user = self._parse_id_var() 1660 self._match(TokenType.PARAMETER) 1661 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1662 1663 if not user or not host: 1664 return None 1665 1666 return exp.DefinerProperty(this=f"{user}@{host}") 1667 1668 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1669 self._match(TokenType.TABLE) 1670 self._match(TokenType.EQ) 1671 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1672 1673 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1674 return self.expression(exp.LogProperty, no=no) 1675 1676 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1677 return self.expression(exp.JournalProperty, **kwargs) 1678 1679 def _parse_checksum(self) -> exp.ChecksumProperty: 1680 self._match(TokenType.EQ) 1681 1682 on = None 1683 if self._match(TokenType.ON): 1684 on = True 1685 elif self._match_text_seq("OFF"): 1686 on = False 1687 1688 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1689 1690 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1691 return self.expression( 1692 exp.Cluster, 1693 expressions=( 1694 self._parse_wrapped_csv(self._parse_ordered) 1695 if wrapped 1696 else self._parse_csv(self._parse_ordered) 1697 ), 1698 ) 1699 1700 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1701 self._match_text_seq("BY") 1702 1703 self._match_l_paren() 1704 expressions = self._parse_csv(self._parse_column) 1705 self._match_r_paren() 1706 1707 if self._match_text_seq("SORTED", "BY"): 1708 self._match_l_paren() 1709 sorted_by = self._parse_csv(self._parse_ordered) 1710 self._match_r_paren() 1711 else: 1712 sorted_by = None 1713 1714 self._match(TokenType.INTO) 1715 buckets = self._parse_number() 1716 self._match_text_seq("BUCKETS") 1717 1718 return self.expression( 1719 exp.ClusteredByProperty, 1720 expressions=expressions, 1721 sorted_by=sorted_by, 1722 buckets=buckets, 1723 ) 1724 1725 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1726 if not self._match_text_seq("GRANTS"): 1727 self._retreat(self._index - 1) 1728 return None 1729 1730 return self.expression(exp.CopyGrantsProperty) 1731 1732 def _parse_freespace(self) -> exp.FreespaceProperty: 1733 self._match(TokenType.EQ) 1734 return self.expression( 1735 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1736 ) 1737 1738 def _parse_mergeblockratio( 1739 self, no: bool = False, default: bool = False 1740 ) -> exp.MergeBlockRatioProperty: 1741 if self._match(TokenType.EQ): 1742 return self.expression( 1743 exp.MergeBlockRatioProperty, 1744 this=self._parse_number(), 1745 percent=self._match(TokenType.PERCENT), 1746 ) 1747 1748 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1749 1750 def _parse_datablocksize( 1751 self, 1752 default: t.Optional[bool] = None, 1753 minimum: t.Optional[bool] = None, 1754 maximum: t.Optional[bool] = None, 1755 ) -> exp.DataBlocksizeProperty: 1756 self._match(TokenType.EQ) 1757 size = self._parse_number() 1758 1759 units = None 1760 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1761 units = self._prev.text 1762 1763 return self.expression( 1764 exp.DataBlocksizeProperty, 1765 size=size, 1766 units=units, 1767 default=default, 1768 minimum=minimum, 1769 maximum=maximum, 1770 ) 1771 1772 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1773 self._match(TokenType.EQ) 1774 always = self._match_text_seq("ALWAYS") 1775 manual = self._match_text_seq("MANUAL") 1776 never = self._match_text_seq("NEVER") 1777 default = self._match_text_seq("DEFAULT") 1778 1779 autotemp = None 1780 if self._match_text_seq("AUTOTEMP"): 1781 autotemp = self._parse_schema() 1782 1783 return self.expression( 1784 exp.BlockCompressionProperty, 1785 always=always, 1786 manual=manual, 1787 never=never, 1788 default=default, 1789 autotemp=autotemp, 1790 ) 1791 1792 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1793 no = self._match_text_seq("NO") 1794 concurrent = self._match_text_seq("CONCURRENT") 1795 self._match_text_seq("ISOLATED", "LOADING") 1796 for_all = self._match_text_seq("FOR", "ALL") 1797 for_insert = self._match_text_seq("FOR", "INSERT") 1798 for_none = self._match_text_seq("FOR", "NONE") 1799 return self.expression( 1800 exp.IsolatedLoadingProperty, 1801 no=no, 1802 concurrent=concurrent, 1803 for_all=for_all, 1804 for_insert=for_insert, 1805 for_none=for_none, 1806 ) 1807 1808 def _parse_locking(self) -> exp.LockingProperty: 1809 if self._match(TokenType.TABLE): 1810 kind = "TABLE" 1811 elif self._match(TokenType.VIEW): 1812 kind = "VIEW" 1813 elif self._match(TokenType.ROW): 1814 kind = "ROW" 1815 elif self._match_text_seq("DATABASE"): 1816 kind = "DATABASE" 1817 else: 1818 kind = None 1819 1820 if kind in ("DATABASE", "TABLE", "VIEW"): 1821 this = self._parse_table_parts() 1822 else: 1823 this = None 1824 1825 if self._match(TokenType.FOR): 1826 for_or_in = "FOR" 1827 elif self._match(TokenType.IN): 1828 for_or_in = "IN" 1829 else: 1830 for_or_in = None 1831 1832 if self._match_text_seq("ACCESS"): 1833 lock_type = "ACCESS" 1834 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1835 lock_type = "EXCLUSIVE" 1836 elif self._match_text_seq("SHARE"): 1837 lock_type = "SHARE" 1838 elif self._match_text_seq("READ"): 1839 lock_type = "READ" 1840 elif self._match_text_seq("WRITE"): 1841 lock_type = "WRITE" 1842 elif self._match_text_seq("CHECKSUM"): 1843 lock_type = "CHECKSUM" 1844 else: 1845 lock_type = None 1846 1847 override = self._match_text_seq("OVERRIDE") 1848 1849 return self.expression( 1850 exp.LockingProperty, 1851 this=this, 1852 kind=kind, 1853 for_or_in=for_or_in, 1854 lock_type=lock_type, 1855 override=override, 1856 ) 1857 1858 def _parse_partition_by(self) -> t.List[exp.Expression]: 1859 if self._match(TokenType.PARTITION_BY): 1860 return self._parse_csv(self._parse_conjunction) 1861 return [] 1862 1863 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1864 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1865 if self._match_text_seq("MINVALUE"): 1866 return exp.var("MINVALUE") 1867 if self._match_text_seq("MAXVALUE"): 1868 return exp.var("MAXVALUE") 1869 return self._parse_bitwise() 1870 1871 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1872 expression = None 1873 from_expressions = None 1874 to_expressions = None 1875 1876 if self._match(TokenType.IN): 1877 this = self._parse_wrapped_csv(self._parse_bitwise) 1878 elif self._match(TokenType.FROM): 1879 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1880 self._match_text_seq("TO") 1881 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1882 elif self._match_text_seq("WITH", "(", "MODULUS"): 1883 this = self._parse_number() 1884 self._match_text_seq(",", "REMAINDER") 1885 expression = self._parse_number() 1886 self._match_r_paren() 1887 else: 1888 self.raise_error("Failed to parse partition bound spec.") 1889 1890 return self.expression( 1891 exp.PartitionBoundSpec, 1892 this=this, 1893 expression=expression, 1894 from_expressions=from_expressions, 1895 to_expressions=to_expressions, 1896 ) 1897 1898 # https://www.postgresql.org/docs/current/sql-createtable.html 1899 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1900 if not self._match_text_seq("OF"): 1901 self._retreat(self._index - 1) 1902 return None 1903 1904 this = self._parse_table(schema=True) 1905 1906 if self._match(TokenType.DEFAULT): 1907 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1908 elif self._match_text_seq("FOR", "VALUES"): 1909 expression = self._parse_partition_bound_spec() 1910 else: 1911 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1912 1913 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1914 1915 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1916 self._match(TokenType.EQ) 1917 return self.expression( 1918 exp.PartitionedByProperty, 1919 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1920 ) 1921 1922 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1923 if self._match_text_seq("AND", "STATISTICS"): 1924 statistics = True 1925 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1926 statistics = False 1927 else: 1928 statistics = None 1929 1930 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1931 1932 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1933 if self._match_text_seq("SQL"): 1934 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1935 return None 1936 1937 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1938 if self._match_text_seq("SQL", "DATA"): 1939 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1940 return None 1941 1942 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1943 if self._match_text_seq("PRIMARY", "INDEX"): 1944 return exp.NoPrimaryIndexProperty() 1945 if self._match_text_seq("SQL"): 1946 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1947 return None 1948 1949 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1950 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1951 return exp.OnCommitProperty() 1952 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1953 return exp.OnCommitProperty(delete=True) 1954 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1955 1956 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1957 if self._match_text_seq("SQL", "DATA"): 1958 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1959 return None 1960 1961 def _parse_distkey(self) -> exp.DistKeyProperty: 1962 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1963 1964 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1965 table = self._parse_table(schema=True) 1966 1967 options = [] 1968 while self._match_texts(("INCLUDING", "EXCLUDING")): 1969 this = self._prev.text.upper() 1970 1971 id_var = self._parse_id_var() 1972 if not id_var: 1973 return None 1974 1975 options.append( 1976 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1977 ) 1978 1979 return self.expression(exp.LikeProperty, this=table, expressions=options) 1980 1981 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1982 return self.expression( 1983 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1984 ) 1985 1986 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1987 self._match(TokenType.EQ) 1988 return self.expression( 1989 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1990 ) 1991 1992 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1993 self._match_text_seq("WITH", "CONNECTION") 1994 return self.expression( 1995 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1996 ) 1997 1998 def _parse_returns(self) -> exp.ReturnsProperty: 1999 value: t.Optional[exp.Expression] 2000 is_table = self._match(TokenType.TABLE) 2001 2002 if is_table: 2003 if self._match(TokenType.LT): 2004 value = self.expression( 2005 exp.Schema, 2006 this="TABLE", 2007 expressions=self._parse_csv(self._parse_struct_types), 2008 ) 2009 if not self._match(TokenType.GT): 2010 self.raise_error("Expecting >") 2011 else: 2012 value = self._parse_schema(exp.var("TABLE")) 2013 else: 2014 value = self._parse_types() 2015 2016 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2017 2018 def _parse_describe(self) -> exp.Describe: 2019 kind = self._match_set(self.CREATABLES) and self._prev.text 2020 extended = self._match_text_seq("EXTENDED") 2021 this = self._parse_table(schema=True) 2022 properties = self._parse_properties() 2023 expressions = properties.expressions if properties else None 2024 return self.expression( 2025 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2026 ) 2027 2028 def _parse_insert(self) -> exp.Insert: 2029 comments = ensure_list(self._prev_comments) 2030 overwrite = self._match(TokenType.OVERWRITE) 2031 ignore = self._match(TokenType.IGNORE) 2032 local = self._match_text_seq("LOCAL") 2033 alternative = None 2034 2035 if self._match_text_seq("DIRECTORY"): 2036 this: t.Optional[exp.Expression] = self.expression( 2037 exp.Directory, 2038 this=self._parse_var_or_string(), 2039 local=local, 2040 row_format=self._parse_row_format(match_row=True), 2041 ) 2042 else: 2043 if self._match(TokenType.OR): 2044 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2045 2046 self._match(TokenType.INTO) 2047 comments += ensure_list(self._prev_comments) 2048 self._match(TokenType.TABLE) 2049 this = self._parse_table(schema=True) 2050 2051 returning = self._parse_returning() 2052 2053 return self.expression( 2054 exp.Insert, 2055 comments=comments, 2056 this=this, 2057 by_name=self._match_text_seq("BY", "NAME"), 2058 exists=self._parse_exists(), 2059 partition=self._parse_partition(), 2060 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2061 and self._parse_conjunction(), 2062 expression=self._parse_ddl_select(), 2063 conflict=self._parse_on_conflict(), 2064 returning=returning or self._parse_returning(), 2065 overwrite=overwrite, 2066 alternative=alternative, 2067 ignore=ignore, 2068 ) 2069 2070 def _parse_kill(self) -> exp.Kill: 2071 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2072 2073 return self.expression( 2074 exp.Kill, 2075 this=self._parse_primary(), 2076 kind=kind, 2077 ) 2078 2079 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2080 conflict = self._match_text_seq("ON", "CONFLICT") 2081 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2082 2083 if not conflict and not duplicate: 2084 return None 2085 2086 nothing = None 2087 expressions = None 2088 key = None 2089 constraint = None 2090 2091 if conflict: 2092 if self._match_text_seq("ON", "CONSTRAINT"): 2093 constraint = self._parse_id_var() 2094 else: 2095 key = self._parse_csv(self._parse_value) 2096 2097 self._match_text_seq("DO") 2098 if self._match_text_seq("NOTHING"): 2099 nothing = True 2100 else: 2101 self._match(TokenType.UPDATE) 2102 self._match(TokenType.SET) 2103 expressions = self._parse_csv(self._parse_equality) 2104 2105 return self.expression( 2106 exp.OnConflict, 2107 duplicate=duplicate, 2108 expressions=expressions, 2109 nothing=nothing, 2110 key=key, 2111 constraint=constraint, 2112 ) 2113 2114 def _parse_returning(self) -> t.Optional[exp.Returning]: 2115 if not self._match(TokenType.RETURNING): 2116 return None 2117 return self.expression( 2118 exp.Returning, 2119 expressions=self._parse_csv(self._parse_expression), 2120 into=self._match(TokenType.INTO) and self._parse_table_part(), 2121 ) 2122 2123 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2124 if not self._match(TokenType.FORMAT): 2125 return None 2126 return self._parse_row_format() 2127 2128 def _parse_row_format( 2129 self, match_row: bool = False 2130 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2131 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2132 return None 2133 2134 if self._match_text_seq("SERDE"): 2135 this = self._parse_string() 2136 2137 serde_properties = None 2138 if self._match(TokenType.SERDE_PROPERTIES): 2139 serde_properties = self.expression( 2140 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2141 ) 2142 2143 return self.expression( 2144 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2145 ) 2146 2147 self._match_text_seq("DELIMITED") 2148 2149 kwargs = {} 2150 2151 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2152 kwargs["fields"] = self._parse_string() 2153 if self._match_text_seq("ESCAPED", "BY"): 2154 kwargs["escaped"] = self._parse_string() 2155 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2156 kwargs["collection_items"] = self._parse_string() 2157 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2158 kwargs["map_keys"] = self._parse_string() 2159 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2160 kwargs["lines"] = self._parse_string() 2161 if self._match_text_seq("NULL", "DEFINED", "AS"): 2162 kwargs["null"] = self._parse_string() 2163 2164 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2165 2166 def _parse_load(self) -> exp.LoadData | exp.Command: 2167 if self._match_text_seq("DATA"): 2168 local = self._match_text_seq("LOCAL") 2169 self._match_text_seq("INPATH") 2170 inpath = self._parse_string() 2171 overwrite = self._match(TokenType.OVERWRITE) 2172 self._match_pair(TokenType.INTO, TokenType.TABLE) 2173 2174 return self.expression( 2175 exp.LoadData, 2176 this=self._parse_table(schema=True), 2177 local=local, 2178 overwrite=overwrite, 2179 inpath=inpath, 2180 partition=self._parse_partition(), 2181 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2182 serde=self._match_text_seq("SERDE") and self._parse_string(), 2183 ) 2184 return self._parse_as_command(self._prev) 2185 2186 def _parse_delete(self) -> exp.Delete: 2187 # This handles MySQL's "Multiple-Table Syntax" 2188 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2189 tables = None 2190 comments = self._prev_comments 2191 if not self._match(TokenType.FROM, advance=False): 2192 tables = self._parse_csv(self._parse_table) or None 2193 2194 returning = self._parse_returning() 2195 2196 return self.expression( 2197 exp.Delete, 2198 comments=comments, 2199 tables=tables, 2200 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2201 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2202 where=self._parse_where(), 2203 returning=returning or self._parse_returning(), 2204 limit=self._parse_limit(), 2205 ) 2206 2207 def _parse_update(self) -> exp.Update: 2208 comments = self._prev_comments 2209 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2210 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2211 returning = self._parse_returning() 2212 return self.expression( 2213 exp.Update, 2214 comments=comments, 2215 **{ # type: ignore 2216 "this": this, 2217 "expressions": expressions, 2218 "from": self._parse_from(joins=True), 2219 "where": self._parse_where(), 2220 "returning": returning or self._parse_returning(), 2221 "order": self._parse_order(), 2222 "limit": self._parse_limit(), 2223 }, 2224 ) 2225 2226 def _parse_uncache(self) -> exp.Uncache: 2227 if not self._match(TokenType.TABLE): 2228 self.raise_error("Expecting TABLE after UNCACHE") 2229 2230 return self.expression( 2231 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2232 ) 2233 2234 def _parse_cache(self) -> exp.Cache: 2235 lazy = self._match_text_seq("LAZY") 2236 self._match(TokenType.TABLE) 2237 table = self._parse_table(schema=True) 2238 2239 options = [] 2240 if self._match_text_seq("OPTIONS"): 2241 self._match_l_paren() 2242 k = self._parse_string() 2243 self._match(TokenType.EQ) 2244 v = self._parse_string() 2245 options = [k, v] 2246 self._match_r_paren() 2247 2248 self._match(TokenType.ALIAS) 2249 return self.expression( 2250 exp.Cache, 2251 this=table, 2252 lazy=lazy, 2253 options=options, 2254 expression=self._parse_select(nested=True), 2255 ) 2256 2257 def _parse_partition(self) -> t.Optional[exp.Partition]: 2258 if not self._match(TokenType.PARTITION): 2259 return None 2260 2261 return self.expression( 2262 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2263 ) 2264 2265 def _parse_value(self) -> exp.Tuple: 2266 if self._match(TokenType.L_PAREN): 2267 expressions = self._parse_csv(self._parse_expression) 2268 self._match_r_paren() 2269 return self.expression(exp.Tuple, expressions=expressions) 2270 2271 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2272 # https://prestodb.io/docs/current/sql/values.html 2273 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2274 2275 def _parse_projections(self) -> t.List[exp.Expression]: 2276 return self._parse_expressions() 2277 2278 def _parse_select( 2279 self, 2280 nested: bool = False, 2281 table: bool = False, 2282 parse_subquery_alias: bool = True, 2283 parse_set_operation: bool = True, 2284 ) -> t.Optional[exp.Expression]: 2285 cte = self._parse_with() 2286 2287 if cte: 2288 this = self._parse_statement() 2289 2290 if not this: 2291 self.raise_error("Failed to parse any statement following CTE") 2292 return cte 2293 2294 if "with" in this.arg_types: 2295 this.set("with", cte) 2296 else: 2297 self.raise_error(f"{this.key} does not support CTE") 2298 this = cte 2299 2300 return this 2301 2302 # duckdb supports leading with FROM x 2303 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2304 2305 if self._match(TokenType.SELECT): 2306 comments = self._prev_comments 2307 2308 hint = self._parse_hint() 2309 all_ = self._match(TokenType.ALL) 2310 distinct = self._match_set(self.DISTINCT_TOKENS) 2311 2312 kind = ( 2313 self._match(TokenType.ALIAS) 2314 and self._match_texts(("STRUCT", "VALUE")) 2315 and self._prev.text.upper() 2316 ) 2317 2318 if distinct: 2319 distinct = self.expression( 2320 exp.Distinct, 2321 on=self._parse_value() if self._match(TokenType.ON) else None, 2322 ) 2323 2324 if all_ and distinct: 2325 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2326 2327 limit = self._parse_limit(top=True) 2328 projections = self._parse_projections() 2329 2330 this = self.expression( 2331 exp.Select, 2332 kind=kind, 2333 hint=hint, 2334 distinct=distinct, 2335 expressions=projections, 2336 limit=limit, 2337 ) 2338 this.comments = comments 2339 2340 into = self._parse_into() 2341 if into: 2342 this.set("into", into) 2343 2344 if not from_: 2345 from_ = self._parse_from() 2346 2347 if from_: 2348 this.set("from", from_) 2349 2350 this = self._parse_query_modifiers(this) 2351 elif (table or nested) and self._match(TokenType.L_PAREN): 2352 if self._match(TokenType.PIVOT): 2353 this = self._parse_simplified_pivot() 2354 elif self._match(TokenType.FROM): 2355 this = exp.select("*").from_( 2356 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2357 ) 2358 else: 2359 this = ( 2360 self._parse_table() 2361 if table 2362 else self._parse_select(nested=True, parse_set_operation=False) 2363 ) 2364 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2365 2366 self._match_r_paren() 2367 2368 # We return early here so that the UNION isn't attached to the subquery by the 2369 # following call to _parse_set_operations, but instead becomes the parent node 2370 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2371 elif self._match(TokenType.VALUES): 2372 this = self.expression( 2373 exp.Values, 2374 expressions=self._parse_csv(self._parse_value), 2375 alias=self._parse_table_alias(), 2376 ) 2377 elif from_: 2378 this = exp.select("*").from_(from_.this, copy=False) 2379 else: 2380 this = None 2381 2382 if parse_set_operation: 2383 return self._parse_set_operations(this) 2384 return this 2385 2386 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2387 if not skip_with_token and not self._match(TokenType.WITH): 2388 return None 2389 2390 comments = self._prev_comments 2391 recursive = self._match(TokenType.RECURSIVE) 2392 2393 expressions = [] 2394 while True: 2395 expressions.append(self._parse_cte()) 2396 2397 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2398 break 2399 else: 2400 self._match(TokenType.WITH) 2401 2402 return self.expression( 2403 exp.With, comments=comments, expressions=expressions, recursive=recursive 2404 ) 2405 2406 def _parse_cte(self) -> exp.CTE: 2407 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2408 if not alias or not alias.this: 2409 self.raise_error("Expected CTE to have alias") 2410 2411 self._match(TokenType.ALIAS) 2412 return self.expression( 2413 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2414 ) 2415 2416 def _parse_table_alias( 2417 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2418 ) -> t.Optional[exp.TableAlias]: 2419 any_token = self._match(TokenType.ALIAS) 2420 alias = ( 2421 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2422 or self._parse_string_as_identifier() 2423 ) 2424 2425 index = self._index 2426 if self._match(TokenType.L_PAREN): 2427 columns = self._parse_csv(self._parse_function_parameter) 2428 self._match_r_paren() if columns else self._retreat(index) 2429 else: 2430 columns = None 2431 2432 if not alias and not columns: 2433 return None 2434 2435 return self.expression(exp.TableAlias, this=alias, columns=columns) 2436 2437 def _parse_subquery( 2438 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2439 ) -> t.Optional[exp.Subquery]: 2440 if not this: 2441 return None 2442 2443 return self.expression( 2444 exp.Subquery, 2445 this=this, 2446 pivots=self._parse_pivots(), 2447 alias=self._parse_table_alias() if parse_alias else None, 2448 ) 2449 2450 def _parse_query_modifiers( 2451 self, this: t.Optional[exp.Expression] 2452 ) -> t.Optional[exp.Expression]: 2453 if isinstance(this, self.MODIFIABLES): 2454 for join in iter(self._parse_join, None): 2455 this.append("joins", join) 2456 for lateral in iter(self._parse_lateral, None): 2457 this.append("laterals", lateral) 2458 2459 while True: 2460 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2461 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2462 key, expression = parser(self) 2463 2464 if expression: 2465 this.set(key, expression) 2466 if key == "limit": 2467 offset = expression.args.pop("offset", None) 2468 if offset: 2469 this.set("offset", exp.Offset(expression=offset)) 2470 continue 2471 break 2472 return this 2473 2474 def _parse_hint(self) -> t.Optional[exp.Hint]: 2475 if self._match(TokenType.HINT): 2476 hints = [] 2477 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2478 hints.extend(hint) 2479 2480 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2481 self.raise_error("Expected */ after HINT") 2482 2483 return self.expression(exp.Hint, expressions=hints) 2484 2485 return None 2486 2487 def _parse_into(self) -> t.Optional[exp.Into]: 2488 if not self._match(TokenType.INTO): 2489 return None 2490 2491 temp = self._match(TokenType.TEMPORARY) 2492 unlogged = self._match_text_seq("UNLOGGED") 2493 self._match(TokenType.TABLE) 2494 2495 return self.expression( 2496 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2497 ) 2498 2499 def _parse_from( 2500 self, joins: bool = False, skip_from_token: bool = False 2501 ) -> t.Optional[exp.From]: 2502 if not skip_from_token and not self._match(TokenType.FROM): 2503 return None 2504 2505 return self.expression( 2506 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2507 ) 2508 2509 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2510 if not self._match(TokenType.MATCH_RECOGNIZE): 2511 return None 2512 2513 self._match_l_paren() 2514 2515 partition = self._parse_partition_by() 2516 order = self._parse_order() 2517 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2518 2519 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2520 rows = exp.var("ONE ROW PER MATCH") 2521 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2522 text = "ALL ROWS PER MATCH" 2523 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2524 text += " SHOW EMPTY MATCHES" 2525 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2526 text += " OMIT EMPTY MATCHES" 2527 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2528 text += " WITH UNMATCHED ROWS" 2529 rows = exp.var(text) 2530 else: 2531 rows = None 2532 2533 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2534 text = "AFTER MATCH SKIP" 2535 if self._match_text_seq("PAST", "LAST", "ROW"): 2536 text += " PAST LAST ROW" 2537 elif self._match_text_seq("TO", "NEXT", "ROW"): 2538 text += " TO NEXT ROW" 2539 elif self._match_text_seq("TO", "FIRST"): 2540 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2541 elif self._match_text_seq("TO", "LAST"): 2542 text += f" TO LAST {self._advance_any().text}" # type: ignore 2543 after = exp.var(text) 2544 else: 2545 after = None 2546 2547 if self._match_text_seq("PATTERN"): 2548 self._match_l_paren() 2549 2550 if not self._curr: 2551 self.raise_error("Expecting )", self._curr) 2552 2553 paren = 1 2554 start = self._curr 2555 2556 while self._curr and paren > 0: 2557 if self._curr.token_type == TokenType.L_PAREN: 2558 paren += 1 2559 if self._curr.token_type == TokenType.R_PAREN: 2560 paren -= 1 2561 2562 end = self._prev 2563 self._advance() 2564 2565 if paren > 0: 2566 self.raise_error("Expecting )", self._curr) 2567 2568 pattern = exp.var(self._find_sql(start, end)) 2569 else: 2570 pattern = None 2571 2572 define = ( 2573 self._parse_csv(self._parse_name_as_expression) 2574 if self._match_text_seq("DEFINE") 2575 else None 2576 ) 2577 2578 self._match_r_paren() 2579 2580 return self.expression( 2581 exp.MatchRecognize, 2582 partition_by=partition, 2583 order=order, 2584 measures=measures, 2585 rows=rows, 2586 after=after, 2587 pattern=pattern, 2588 define=define, 2589 alias=self._parse_table_alias(), 2590 ) 2591 2592 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2593 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2594 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2595 cross_apply = False 2596 2597 if cross_apply is not None: 2598 this = self._parse_select(table=True) 2599 view = None 2600 outer = None 2601 elif self._match(TokenType.LATERAL): 2602 this = self._parse_select(table=True) 2603 view = self._match(TokenType.VIEW) 2604 outer = self._match(TokenType.OUTER) 2605 else: 2606 return None 2607 2608 if not this: 2609 this = ( 2610 self._parse_unnest() 2611 or self._parse_function() 2612 or self._parse_id_var(any_token=False) 2613 ) 2614 2615 while self._match(TokenType.DOT): 2616 this = exp.Dot( 2617 this=this, 2618 expression=self._parse_function() or self._parse_id_var(any_token=False), 2619 ) 2620 2621 if view: 2622 table = self._parse_id_var(any_token=False) 2623 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2624 table_alias: t.Optional[exp.TableAlias] = self.expression( 2625 exp.TableAlias, this=table, columns=columns 2626 ) 2627 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2628 # We move the alias from the lateral's child node to the lateral itself 2629 table_alias = this.args["alias"].pop() 2630 else: 2631 table_alias = self._parse_table_alias() 2632 2633 return self.expression( 2634 exp.Lateral, 2635 this=this, 2636 view=view, 2637 outer=outer, 2638 alias=table_alias, 2639 cross_apply=cross_apply, 2640 ) 2641 2642 def _parse_join_parts( 2643 self, 2644 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2645 return ( 2646 self._match_set(self.JOIN_METHODS) and self._prev, 2647 self._match_set(self.JOIN_SIDES) and self._prev, 2648 self._match_set(self.JOIN_KINDS) and self._prev, 2649 ) 2650 2651 def _parse_join( 2652 self, skip_join_token: bool = False, parse_bracket: bool = False 2653 ) -> t.Optional[exp.Join]: 2654 if self._match(TokenType.COMMA): 2655 return self.expression(exp.Join, this=self._parse_table()) 2656 2657 index = self._index 2658 method, side, kind = self._parse_join_parts() 2659 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2660 join = self._match(TokenType.JOIN) 2661 2662 if not skip_join_token and not join: 2663 self._retreat(index) 2664 kind = None 2665 method = None 2666 side = None 2667 2668 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2669 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2670 2671 if not skip_join_token and not join and not outer_apply and not cross_apply: 2672 return None 2673 2674 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2675 2676 if method: 2677 kwargs["method"] = method.text 2678 if side: 2679 kwargs["side"] = side.text 2680 if kind: 2681 kwargs["kind"] = kind.text 2682 if hint: 2683 kwargs["hint"] = hint 2684 2685 if self._match(TokenType.ON): 2686 kwargs["on"] = self._parse_conjunction() 2687 elif self._match(TokenType.USING): 2688 kwargs["using"] = self._parse_wrapped_id_vars() 2689 elif not (kind and kind.token_type == TokenType.CROSS): 2690 index = self._index 2691 join = self._parse_join() 2692 2693 if join and self._match(TokenType.ON): 2694 kwargs["on"] = self._parse_conjunction() 2695 elif join and self._match(TokenType.USING): 2696 kwargs["using"] = self._parse_wrapped_id_vars() 2697 else: 2698 join = None 2699 self._retreat(index) 2700 2701 kwargs["this"].set("joins", [join] if join else None) 2702 2703 comments = [c for token in (method, side, kind) if token for c in token.comments] 2704 return self.expression(exp.Join, comments=comments, **kwargs) 2705 2706 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2707 this = self._parse_conjunction() 2708 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2709 return this 2710 2711 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2712 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2713 2714 return this 2715 2716 def _parse_index( 2717 self, 2718 index: t.Optional[exp.Expression] = None, 2719 ) -> t.Optional[exp.Index]: 2720 if index: 2721 unique = None 2722 primary = None 2723 amp = None 2724 2725 self._match(TokenType.ON) 2726 self._match(TokenType.TABLE) # hive 2727 table = self._parse_table_parts(schema=True) 2728 else: 2729 unique = self._match(TokenType.UNIQUE) 2730 primary = self._match_text_seq("PRIMARY") 2731 amp = self._match_text_seq("AMP") 2732 2733 if not self._match(TokenType.INDEX): 2734 return None 2735 2736 index = self._parse_id_var() 2737 table = None 2738 2739 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2740 2741 if self._match(TokenType.L_PAREN, advance=False): 2742 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2743 else: 2744 columns = None 2745 2746 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2747 2748 return self.expression( 2749 exp.Index, 2750 this=index, 2751 table=table, 2752 using=using, 2753 columns=columns, 2754 unique=unique, 2755 primary=primary, 2756 amp=amp, 2757 include=include, 2758 partition_by=self._parse_partition_by(), 2759 where=self._parse_where(), 2760 ) 2761 2762 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2763 hints: t.List[exp.Expression] = [] 2764 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2765 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2766 hints.append( 2767 self.expression( 2768 exp.WithTableHint, 2769 expressions=self._parse_csv( 2770 lambda: self._parse_function() or self._parse_var(any_token=True) 2771 ), 2772 ) 2773 ) 2774 self._match_r_paren() 2775 else: 2776 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2777 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2778 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2779 2780 self._match_texts(("INDEX", "KEY")) 2781 if self._match(TokenType.FOR): 2782 hint.set("target", self._advance_any() and self._prev.text.upper()) 2783 2784 hint.set("expressions", self._parse_wrapped_id_vars()) 2785 hints.append(hint) 2786 2787 return hints or None 2788 2789 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2790 return ( 2791 (not schema and self._parse_function(optional_parens=False)) 2792 or self._parse_id_var(any_token=False) 2793 or self._parse_string_as_identifier() 2794 or self._parse_placeholder() 2795 ) 2796 2797 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2798 catalog = None 2799 db = None 2800 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2801 2802 while self._match(TokenType.DOT): 2803 if catalog: 2804 # This allows nesting the table in arbitrarily many dot expressions if needed 2805 table = self.expression( 2806 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2807 ) 2808 else: 2809 catalog = db 2810 db = table 2811 table = self._parse_table_part(schema=schema) or "" 2812 2813 if is_db_reference: 2814 catalog = db 2815 db = table 2816 table = None 2817 2818 if not table and not is_db_reference: 2819 self.raise_error(f"Expected table name but got {self._curr}") 2820 if not db and is_db_reference: 2821 self.raise_error(f"Expected database name but got {self._curr}") 2822 2823 return self.expression( 2824 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2825 ) 2826 2827 def _parse_table( 2828 self, 2829 schema: bool = False, 2830 joins: bool = False, 2831 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2832 parse_bracket: bool = False, 2833 is_db_reference: bool = False, 2834 ) -> t.Optional[exp.Expression]: 2835 lateral = self._parse_lateral() 2836 if lateral: 2837 return lateral 2838 2839 unnest = self._parse_unnest() 2840 if unnest: 2841 return unnest 2842 2843 values = self._parse_derived_table_values() 2844 if values: 2845 return values 2846 2847 subquery = self._parse_select(table=True) 2848 if subquery: 2849 if not subquery.args.get("pivots"): 2850 subquery.set("pivots", self._parse_pivots()) 2851 return subquery 2852 2853 bracket = parse_bracket and self._parse_bracket(None) 2854 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2855 this = t.cast( 2856 exp.Expression, 2857 bracket 2858 or self._parse_bracket( 2859 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2860 ), 2861 ) 2862 2863 if schema: 2864 return self._parse_schema(this=this) 2865 2866 version = self._parse_version() 2867 2868 if version: 2869 this.set("version", version) 2870 2871 if self.dialect.ALIAS_POST_TABLESAMPLE: 2872 table_sample = self._parse_table_sample() 2873 2874 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2875 if alias: 2876 this.set("alias", alias) 2877 2878 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2879 return self.expression( 2880 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2881 ) 2882 2883 this.set("hints", self._parse_table_hints()) 2884 2885 if not this.args.get("pivots"): 2886 this.set("pivots", self._parse_pivots()) 2887 2888 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2889 table_sample = self._parse_table_sample() 2890 2891 if table_sample: 2892 table_sample.set("this", this) 2893 this = table_sample 2894 2895 if joins: 2896 for join in iter(self._parse_join, None): 2897 this.append("joins", join) 2898 2899 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2900 this.set("ordinality", True) 2901 this.set("alias", self._parse_table_alias()) 2902 2903 return this 2904 2905 def _parse_version(self) -> t.Optional[exp.Version]: 2906 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2907 this = "TIMESTAMP" 2908 elif self._match(TokenType.VERSION_SNAPSHOT): 2909 this = "VERSION" 2910 else: 2911 return None 2912 2913 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2914 kind = self._prev.text.upper() 2915 start = self._parse_bitwise() 2916 self._match_texts(("TO", "AND")) 2917 end = self._parse_bitwise() 2918 expression: t.Optional[exp.Expression] = self.expression( 2919 exp.Tuple, expressions=[start, end] 2920 ) 2921 elif self._match_text_seq("CONTAINED", "IN"): 2922 kind = "CONTAINED IN" 2923 expression = self.expression( 2924 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2925 ) 2926 elif self._match(TokenType.ALL): 2927 kind = "ALL" 2928 expression = None 2929 else: 2930 self._match_text_seq("AS", "OF") 2931 kind = "AS OF" 2932 expression = self._parse_type() 2933 2934 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2935 2936 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2937 if not self._match(TokenType.UNNEST): 2938 return None 2939 2940 expressions = self._parse_wrapped_csv(self._parse_equality) 2941 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2942 2943 alias = self._parse_table_alias() if with_alias else None 2944 2945 if alias: 2946 if self.dialect.UNNEST_COLUMN_ONLY: 2947 if alias.args.get("columns"): 2948 self.raise_error("Unexpected extra column alias in unnest.") 2949 2950 alias.set("columns", [alias.this]) 2951 alias.set("this", None) 2952 2953 columns = alias.args.get("columns") or [] 2954 if offset and len(expressions) < len(columns): 2955 offset = columns.pop() 2956 2957 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2958 self._match(TokenType.ALIAS) 2959 offset = self._parse_id_var( 2960 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2961 ) or exp.to_identifier("offset") 2962 2963 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2964 2965 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2966 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2967 if not is_derived and not self._match(TokenType.VALUES): 2968 return None 2969 2970 expressions = self._parse_csv(self._parse_value) 2971 alias = self._parse_table_alias() 2972 2973 if is_derived: 2974 self._match_r_paren() 2975 2976 return self.expression( 2977 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2978 ) 2979 2980 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2981 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2982 as_modifier and self._match_text_seq("USING", "SAMPLE") 2983 ): 2984 return None 2985 2986 bucket_numerator = None 2987 bucket_denominator = None 2988 bucket_field = None 2989 percent = None 2990 size = None 2991 seed = None 2992 2993 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2994 matched_l_paren = self._match(TokenType.L_PAREN) 2995 2996 if self.TABLESAMPLE_CSV: 2997 num = None 2998 expressions = self._parse_csv(self._parse_primary) 2999 else: 3000 expressions = None 3001 num = ( 3002 self._parse_factor() 3003 if self._match(TokenType.NUMBER, advance=False) 3004 else self._parse_primary() or self._parse_placeholder() 3005 ) 3006 3007 if self._match_text_seq("BUCKET"): 3008 bucket_numerator = self._parse_number() 3009 self._match_text_seq("OUT", "OF") 3010 bucket_denominator = bucket_denominator = self._parse_number() 3011 self._match(TokenType.ON) 3012 bucket_field = self._parse_field() 3013 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3014 percent = num 3015 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3016 size = num 3017 else: 3018 percent = num 3019 3020 if matched_l_paren: 3021 self._match_r_paren() 3022 3023 if self._match(TokenType.L_PAREN): 3024 method = self._parse_var(upper=True) 3025 seed = self._match(TokenType.COMMA) and self._parse_number() 3026 self._match_r_paren() 3027 elif self._match_texts(("SEED", "REPEATABLE")): 3028 seed = self._parse_wrapped(self._parse_number) 3029 3030 return self.expression( 3031 exp.TableSample, 3032 expressions=expressions, 3033 method=method, 3034 bucket_numerator=bucket_numerator, 3035 bucket_denominator=bucket_denominator, 3036 bucket_field=bucket_field, 3037 percent=percent, 3038 size=size, 3039 seed=seed, 3040 ) 3041 3042 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3043 return list(iter(self._parse_pivot, None)) or None 3044 3045 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3046 return list(iter(self._parse_join, None)) or None 3047 3048 # https://duckdb.org/docs/sql/statements/pivot 3049 def _parse_simplified_pivot(self) -> exp.Pivot: 3050 def _parse_on() -> t.Optional[exp.Expression]: 3051 this = self._parse_bitwise() 3052 return self._parse_in(this) if self._match(TokenType.IN) else this 3053 3054 this = self._parse_table() 3055 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3056 using = self._match(TokenType.USING) and self._parse_csv( 3057 lambda: self._parse_alias(self._parse_function()) 3058 ) 3059 group = self._parse_group() 3060 return self.expression( 3061 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3062 ) 3063 3064 def _parse_pivot_in(self) -> exp.In: 3065 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3066 this = self._parse_conjunction() 3067 3068 self._match(TokenType.ALIAS) 3069 alias = self._parse_field() 3070 if alias: 3071 return self.expression(exp.PivotAlias, this=this, alias=alias) 3072 3073 return this 3074 3075 value = self._parse_column() 3076 3077 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3078 self.raise_error("Expecting IN (") 3079 3080 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3081 3082 self._match_r_paren() 3083 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3084 3085 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3086 index = self._index 3087 include_nulls = None 3088 3089 if self._match(TokenType.PIVOT): 3090 unpivot = False 3091 elif self._match(TokenType.UNPIVOT): 3092 unpivot = True 3093 3094 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3095 if self._match_text_seq("INCLUDE", "NULLS"): 3096 include_nulls = True 3097 elif self._match_text_seq("EXCLUDE", "NULLS"): 3098 include_nulls = False 3099 else: 3100 return None 3101 3102 expressions = [] 3103 3104 if not self._match(TokenType.L_PAREN): 3105 self._retreat(index) 3106 return None 3107 3108 if unpivot: 3109 expressions = self._parse_csv(self._parse_column) 3110 else: 3111 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3112 3113 if not expressions: 3114 self.raise_error("Failed to parse PIVOT's aggregation list") 3115 3116 if not self._match(TokenType.FOR): 3117 self.raise_error("Expecting FOR") 3118 3119 field = self._parse_pivot_in() 3120 3121 self._match_r_paren() 3122 3123 pivot = self.expression( 3124 exp.Pivot, 3125 expressions=expressions, 3126 field=field, 3127 unpivot=unpivot, 3128 include_nulls=include_nulls, 3129 ) 3130 3131 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3132 pivot.set("alias", self._parse_table_alias()) 3133 3134 if not unpivot: 3135 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3136 3137 columns: t.List[exp.Expression] = [] 3138 for fld in pivot.args["field"].expressions: 3139 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3140 for name in names: 3141 if self.PREFIXED_PIVOT_COLUMNS: 3142 name = f"{name}_{field_name}" if name else field_name 3143 else: 3144 name = f"{field_name}_{name}" if name else field_name 3145 3146 columns.append(exp.to_identifier(name)) 3147 3148 pivot.set("columns", columns) 3149 3150 return pivot 3151 3152 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3153 return [agg.alias for agg in aggregations] 3154 3155 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3156 if not skip_where_token and not self._match(TokenType.WHERE): 3157 return None 3158 3159 return self.expression( 3160 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3161 ) 3162 3163 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3164 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3165 return None 3166 3167 elements = defaultdict(list) 3168 3169 if self._match(TokenType.ALL): 3170 return self.expression(exp.Group, all=True) 3171 3172 while True: 3173 expressions = self._parse_csv(self._parse_conjunction) 3174 if expressions: 3175 elements["expressions"].extend(expressions) 3176 3177 grouping_sets = self._parse_grouping_sets() 3178 if grouping_sets: 3179 elements["grouping_sets"].extend(grouping_sets) 3180 3181 rollup = None 3182 cube = None 3183 totals = None 3184 3185 index = self._index 3186 with_ = self._match(TokenType.WITH) 3187 if self._match(TokenType.ROLLUP): 3188 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3189 elements["rollup"].extend(ensure_list(rollup)) 3190 3191 if self._match(TokenType.CUBE): 3192 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3193 elements["cube"].extend(ensure_list(cube)) 3194 3195 if self._match_text_seq("TOTALS"): 3196 totals = True 3197 elements["totals"] = True # type: ignore 3198 3199 if not (grouping_sets or rollup or cube or totals): 3200 if with_: 3201 self._retreat(index) 3202 break 3203 3204 return self.expression(exp.Group, **elements) # type: ignore 3205 3206 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3207 if not self._match(TokenType.GROUPING_SETS): 3208 return None 3209 3210 return self._parse_wrapped_csv(self._parse_grouping_set) 3211 3212 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3213 if self._match(TokenType.L_PAREN): 3214 grouping_set = self._parse_csv(self._parse_column) 3215 self._match_r_paren() 3216 return self.expression(exp.Tuple, expressions=grouping_set) 3217 3218 return self._parse_column() 3219 3220 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3221 if not skip_having_token and not self._match(TokenType.HAVING): 3222 return None 3223 return self.expression(exp.Having, this=self._parse_conjunction()) 3224 3225 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3226 if not self._match(TokenType.QUALIFY): 3227 return None 3228 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3229 3230 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3231 if skip_start_token: 3232 start = None 3233 elif self._match(TokenType.START_WITH): 3234 start = self._parse_conjunction() 3235 else: 3236 return None 3237 3238 self._match(TokenType.CONNECT_BY) 3239 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3240 exp.Prior, this=self._parse_bitwise() 3241 ) 3242 connect = self._parse_conjunction() 3243 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3244 3245 if not start and self._match(TokenType.START_WITH): 3246 start = self._parse_conjunction() 3247 3248 return self.expression(exp.Connect, start=start, connect=connect) 3249 3250 def _parse_name_as_expression(self) -> exp.Alias: 3251 return self.expression( 3252 exp.Alias, 3253 alias=self._parse_id_var(any_token=True), 3254 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3255 ) 3256 3257 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3258 if self._match_text_seq("INTERPOLATE"): 3259 return self._parse_wrapped_csv(self._parse_name_as_expression) 3260 return None 3261 3262 def _parse_order( 3263 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3264 ) -> t.Optional[exp.Expression]: 3265 siblings = None 3266 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3267 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3268 return this 3269 3270 siblings = True 3271 3272 return self.expression( 3273 exp.Order, 3274 this=this, 3275 expressions=self._parse_csv(self._parse_ordered), 3276 interpolate=self._parse_interpolate(), 3277 siblings=siblings, 3278 ) 3279 3280 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3281 if not self._match(token): 3282 return None 3283 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3284 3285 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3286 this = parse_method() if parse_method else self._parse_conjunction() 3287 3288 asc = self._match(TokenType.ASC) 3289 desc = self._match(TokenType.DESC) or (asc and False) 3290 3291 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3292 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3293 3294 nulls_first = is_nulls_first or False 3295 explicitly_null_ordered = is_nulls_first or is_nulls_last 3296 3297 if ( 3298 not explicitly_null_ordered 3299 and ( 3300 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3301 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3302 ) 3303 and self.dialect.NULL_ORDERING != "nulls_are_last" 3304 ): 3305 nulls_first = True 3306 3307 if self._match_text_seq("WITH", "FILL"): 3308 with_fill = self.expression( 3309 exp.WithFill, 3310 **{ # type: ignore 3311 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3312 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3313 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3314 }, 3315 ) 3316 else: 3317 with_fill = None 3318 3319 return self.expression( 3320 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3321 ) 3322 3323 def _parse_limit( 3324 self, this: t.Optional[exp.Expression] = None, top: bool = False 3325 ) -> t.Optional[exp.Expression]: 3326 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3327 comments = self._prev_comments 3328 if top: 3329 limit_paren = self._match(TokenType.L_PAREN) 3330 expression = self._parse_term() if limit_paren else self._parse_number() 3331 3332 if limit_paren: 3333 self._match_r_paren() 3334 else: 3335 expression = self._parse_term() 3336 3337 if self._match(TokenType.COMMA): 3338 offset = expression 3339 expression = self._parse_term() 3340 else: 3341 offset = None 3342 3343 limit_exp = self.expression( 3344 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3345 ) 3346 3347 return limit_exp 3348 3349 if self._match(TokenType.FETCH): 3350 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3351 direction = self._prev.text.upper() if direction else "FIRST" 3352 3353 count = self._parse_field(tokens=self.FETCH_TOKENS) 3354 percent = self._match(TokenType.PERCENT) 3355 3356 self._match_set((TokenType.ROW, TokenType.ROWS)) 3357 3358 only = self._match_text_seq("ONLY") 3359 with_ties = self._match_text_seq("WITH", "TIES") 3360 3361 if only and with_ties: 3362 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3363 3364 return self.expression( 3365 exp.Fetch, 3366 direction=direction, 3367 count=count, 3368 percent=percent, 3369 with_ties=with_ties, 3370 ) 3371 3372 return this 3373 3374 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3375 if not self._match(TokenType.OFFSET): 3376 return this 3377 3378 count = self._parse_term() 3379 self._match_set((TokenType.ROW, TokenType.ROWS)) 3380 return self.expression(exp.Offset, this=this, expression=count) 3381 3382 def _parse_locks(self) -> t.List[exp.Lock]: 3383 locks = [] 3384 while True: 3385 if self._match_text_seq("FOR", "UPDATE"): 3386 update = True 3387 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3388 "LOCK", "IN", "SHARE", "MODE" 3389 ): 3390 update = False 3391 else: 3392 break 3393 3394 expressions = None 3395 if self._match_text_seq("OF"): 3396 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3397 3398 wait: t.Optional[bool | exp.Expression] = None 3399 if self._match_text_seq("NOWAIT"): 3400 wait = True 3401 elif self._match_text_seq("WAIT"): 3402 wait = self._parse_primary() 3403 elif self._match_text_seq("SKIP", "LOCKED"): 3404 wait = False 3405 3406 locks.append( 3407 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3408 ) 3409 3410 return locks 3411 3412 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3413 while this and self._match_set(self.SET_OPERATIONS): 3414 token_type = self._prev.token_type 3415 3416 if token_type == TokenType.UNION: 3417 operation = exp.Union 3418 elif token_type == TokenType.EXCEPT: 3419 operation = exp.Except 3420 else: 3421 operation = exp.Intersect 3422 3423 comments = self._prev.comments 3424 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3425 by_name = self._match_text_seq("BY", "NAME") 3426 expression = self._parse_select(nested=True, parse_set_operation=False) 3427 3428 this = self.expression( 3429 operation, 3430 comments=comments, 3431 this=this, 3432 distinct=distinct, 3433 by_name=by_name, 3434 expression=expression, 3435 ) 3436 3437 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3438 expression = this.expression 3439 3440 if expression: 3441 for arg in self.UNION_MODIFIERS: 3442 expr = expression.args.get(arg) 3443 if expr: 3444 this.set(arg, expr.pop()) 3445 3446 return this 3447 3448 def _parse_expression(self) -> t.Optional[exp.Expression]: 3449 return self._parse_alias(self._parse_conjunction()) 3450 3451 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3452 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3453 3454 def _parse_equality(self) -> t.Optional[exp.Expression]: 3455 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3456 3457 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3458 return self._parse_tokens(self._parse_range, self.COMPARISON) 3459 3460 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3461 this = this or self._parse_bitwise() 3462 negate = self._match(TokenType.NOT) 3463 3464 if self._match_set(self.RANGE_PARSERS): 3465 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3466 if not expression: 3467 return this 3468 3469 this = expression 3470 elif self._match(TokenType.ISNULL): 3471 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3472 3473 # Postgres supports ISNULL and NOTNULL for conditions. 3474 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3475 if self._match(TokenType.NOTNULL): 3476 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3477 this = self.expression(exp.Not, this=this) 3478 3479 if negate: 3480 this = self.expression(exp.Not, this=this) 3481 3482 if self._match(TokenType.IS): 3483 this = self._parse_is(this) 3484 3485 return this 3486 3487 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3488 index = self._index - 1 3489 negate = self._match(TokenType.NOT) 3490 3491 if self._match_text_seq("DISTINCT", "FROM"): 3492 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3493 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3494 3495 expression = self._parse_null() or self._parse_boolean() 3496 if not expression: 3497 self._retreat(index) 3498 return None 3499 3500 this = self.expression(exp.Is, this=this, expression=expression) 3501 return self.expression(exp.Not, this=this) if negate else this 3502 3503 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3504 unnest = self._parse_unnest(with_alias=False) 3505 if unnest: 3506 this = self.expression(exp.In, this=this, unnest=unnest) 3507 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3508 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3509 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3510 3511 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3512 this = self.expression(exp.In, this=this, query=expressions[0]) 3513 else: 3514 this = self.expression(exp.In, this=this, expressions=expressions) 3515 3516 if matched_l_paren: 3517 self._match_r_paren(this) 3518 elif not self._match(TokenType.R_BRACKET, expression=this): 3519 self.raise_error("Expecting ]") 3520 else: 3521 this = self.expression(exp.In, this=this, field=self._parse_field()) 3522 3523 return this 3524 3525 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3526 low = self._parse_bitwise() 3527 self._match(TokenType.AND) 3528 high = self._parse_bitwise() 3529 return self.expression(exp.Between, this=this, low=low, high=high) 3530 3531 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3532 if not self._match(TokenType.ESCAPE): 3533 return this 3534 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3535 3536 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3537 index = self._index 3538 3539 if not self._match(TokenType.INTERVAL) and match_interval: 3540 return None 3541 3542 if self._match(TokenType.STRING, advance=False): 3543 this = self._parse_primary() 3544 else: 3545 this = self._parse_term() 3546 3547 if not this or ( 3548 isinstance(this, exp.Column) 3549 and not this.table 3550 and not this.this.quoted 3551 and this.name.upper() == "IS" 3552 ): 3553 self._retreat(index) 3554 return None 3555 3556 unit = self._parse_function() or ( 3557 not self._match(TokenType.ALIAS, advance=False) 3558 and self._parse_var(any_token=True, upper=True) 3559 ) 3560 3561 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3562 # each INTERVAL expression into this canonical form so it's easy to transpile 3563 if this and this.is_number: 3564 this = exp.Literal.string(this.name) 3565 elif this and this.is_string: 3566 parts = this.name.split() 3567 3568 if len(parts) == 2: 3569 if unit: 3570 # This is not actually a unit, it's something else (e.g. a "window side") 3571 unit = None 3572 self._retreat(self._index - 1) 3573 3574 this = exp.Literal.string(parts[0]) 3575 unit = self.expression(exp.Var, this=parts[1].upper()) 3576 3577 return self.expression(exp.Interval, this=this, unit=unit) 3578 3579 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3580 this = self._parse_term() 3581 3582 while True: 3583 if self._match_set(self.BITWISE): 3584 this = self.expression( 3585 self.BITWISE[self._prev.token_type], 3586 this=this, 3587 expression=self._parse_term(), 3588 ) 3589 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3590 this = self.expression( 3591 exp.DPipe, 3592 this=this, 3593 expression=self._parse_term(), 3594 safe=not self.dialect.STRICT_STRING_CONCAT, 3595 ) 3596 elif self._match(TokenType.DQMARK): 3597 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3598 elif self._match_pair(TokenType.LT, TokenType.LT): 3599 this = self.expression( 3600 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3601 ) 3602 elif self._match_pair(TokenType.GT, TokenType.GT): 3603 this = self.expression( 3604 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3605 ) 3606 else: 3607 break 3608 3609 return this 3610 3611 def _parse_term(self) -> t.Optional[exp.Expression]: 3612 return self._parse_tokens(self._parse_factor, self.TERM) 3613 3614 def _parse_factor(self) -> t.Optional[exp.Expression]: 3615 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3616 this = parse_method() 3617 3618 while self._match_set(self.FACTOR): 3619 this = self.expression( 3620 self.FACTOR[self._prev.token_type], 3621 this=this, 3622 comments=self._prev_comments, 3623 expression=parse_method(), 3624 ) 3625 if isinstance(this, exp.Div): 3626 this.args["typed"] = self.dialect.TYPED_DIVISION 3627 this.args["safe"] = self.dialect.SAFE_DIVISION 3628 3629 return this 3630 3631 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3632 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3633 3634 def _parse_unary(self) -> t.Optional[exp.Expression]: 3635 if self._match_set(self.UNARY_PARSERS): 3636 return self.UNARY_PARSERS[self._prev.token_type](self) 3637 return self._parse_at_time_zone(self._parse_type()) 3638 3639 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3640 interval = parse_interval and self._parse_interval() 3641 if interval: 3642 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3643 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3644 interval = self.expression( # type: ignore 3645 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3646 ) 3647 3648 return interval 3649 3650 index = self._index 3651 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3652 this = self._parse_column() 3653 3654 if data_type: 3655 if isinstance(this, exp.Literal): 3656 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3657 if parser: 3658 return parser(self, this, data_type) 3659 return self.expression(exp.Cast, this=this, to=data_type) 3660 if not data_type.expressions: 3661 self._retreat(index) 3662 return self._parse_column() 3663 return self._parse_column_ops(data_type) 3664 3665 return this and self._parse_column_ops(this) 3666 3667 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3668 this = self._parse_type() 3669 if not this: 3670 return None 3671 3672 return self.expression( 3673 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3674 ) 3675 3676 def _parse_types( 3677 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3678 ) -> t.Optional[exp.Expression]: 3679 index = self._index 3680 3681 prefix = self._match_text_seq("SYSUDTLIB", ".") 3682 3683 if not self._match_set(self.TYPE_TOKENS): 3684 identifier = allow_identifiers and self._parse_id_var( 3685 any_token=False, tokens=(TokenType.VAR,) 3686 ) 3687 if identifier: 3688 tokens = self.dialect.tokenize(identifier.name) 3689 3690 if len(tokens) != 1: 3691 self.raise_error("Unexpected identifier", self._prev) 3692 3693 if tokens[0].token_type in self.TYPE_TOKENS: 3694 self._prev = tokens[0] 3695 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3696 type_name = identifier.name 3697 3698 while self._match(TokenType.DOT): 3699 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3700 3701 return exp.DataType.build(type_name, udt=True) 3702 else: 3703 self._retreat(self._index - 1) 3704 return None 3705 else: 3706 return None 3707 3708 type_token = self._prev.token_type 3709 3710 if type_token == TokenType.PSEUDO_TYPE: 3711 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3712 3713 if type_token == TokenType.OBJECT_IDENTIFIER: 3714 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3715 3716 nested = type_token in self.NESTED_TYPE_TOKENS 3717 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3718 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3719 expressions = None 3720 maybe_func = False 3721 3722 if self._match(TokenType.L_PAREN): 3723 if is_struct: 3724 expressions = self._parse_csv(self._parse_struct_types) 3725 elif nested: 3726 expressions = self._parse_csv( 3727 lambda: self._parse_types( 3728 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3729 ) 3730 ) 3731 elif type_token in self.ENUM_TYPE_TOKENS: 3732 expressions = self._parse_csv(self._parse_equality) 3733 elif is_aggregate: 3734 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3735 any_token=False, tokens=(TokenType.VAR,) 3736 ) 3737 if not func_or_ident or not self._match(TokenType.COMMA): 3738 return None 3739 expressions = self._parse_csv( 3740 lambda: self._parse_types( 3741 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3742 ) 3743 ) 3744 expressions.insert(0, func_or_ident) 3745 else: 3746 expressions = self._parse_csv(self._parse_type_size) 3747 3748 if not expressions or not self._match(TokenType.R_PAREN): 3749 self._retreat(index) 3750 return None 3751 3752 maybe_func = True 3753 3754 this: t.Optional[exp.Expression] = None 3755 values: t.Optional[t.List[exp.Expression]] = None 3756 3757 if nested and self._match(TokenType.LT): 3758 if is_struct: 3759 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3760 else: 3761 expressions = self._parse_csv( 3762 lambda: self._parse_types( 3763 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3764 ) 3765 ) 3766 3767 if not self._match(TokenType.GT): 3768 self.raise_error("Expecting >") 3769 3770 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3771 values = self._parse_csv(self._parse_conjunction) 3772 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3773 3774 if type_token in self.TIMESTAMPS: 3775 if self._match_text_seq("WITH", "TIME", "ZONE"): 3776 maybe_func = False 3777 tz_type = ( 3778 exp.DataType.Type.TIMETZ 3779 if type_token in self.TIMES 3780 else exp.DataType.Type.TIMESTAMPTZ 3781 ) 3782 this = exp.DataType(this=tz_type, expressions=expressions) 3783 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3784 maybe_func = False 3785 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3786 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3787 maybe_func = False 3788 elif type_token == TokenType.INTERVAL: 3789 unit = self._parse_var() 3790 3791 if self._match_text_seq("TO"): 3792 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3793 else: 3794 span = None 3795 3796 if span or not unit: 3797 this = self.expression( 3798 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3799 ) 3800 else: 3801 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3802 3803 if maybe_func and check_func: 3804 index2 = self._index 3805 peek = self._parse_string() 3806 3807 if not peek: 3808 self._retreat(index) 3809 return None 3810 3811 self._retreat(index2) 3812 3813 if not this: 3814 if self._match_text_seq("UNSIGNED"): 3815 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3816 if not unsigned_type_token: 3817 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3818 3819 type_token = unsigned_type_token or type_token 3820 3821 this = exp.DataType( 3822 this=exp.DataType.Type[type_token.value], 3823 expressions=expressions, 3824 nested=nested, 3825 values=values, 3826 prefix=prefix, 3827 ) 3828 3829 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3830 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3831 3832 return this 3833 3834 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3835 index = self._index 3836 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3837 self._match(TokenType.COLON) 3838 column_def = self._parse_column_def(this) 3839 3840 if type_required and ( 3841 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3842 ): 3843 self._retreat(index) 3844 return self._parse_types() 3845 3846 return column_def 3847 3848 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3849 if not self._match_text_seq("AT", "TIME", "ZONE"): 3850 return this 3851 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3852 3853 def _parse_column(self) -> t.Optional[exp.Expression]: 3854 this = self._parse_field() 3855 if isinstance(this, exp.Identifier): 3856 this = self.expression(exp.Column, this=this) 3857 elif not this: 3858 return self._parse_bracket(this) 3859 return self._parse_column_ops(this) 3860 3861 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3862 this = self._parse_bracket(this) 3863 3864 while self._match_set(self.COLUMN_OPERATORS): 3865 op_token = self._prev.token_type 3866 op = self.COLUMN_OPERATORS.get(op_token) 3867 3868 if op_token == TokenType.DCOLON: 3869 field = self._parse_types() 3870 if not field: 3871 self.raise_error("Expected type") 3872 elif op and self._curr: 3873 self._advance() 3874 value = self._prev.text 3875 field = ( 3876 exp.Literal.number(value) 3877 if self._prev.token_type == TokenType.NUMBER 3878 else exp.Literal.string(value) 3879 ) 3880 else: 3881 field = self._parse_field(anonymous_func=True, any_token=True) 3882 3883 if isinstance(field, exp.Func): 3884 # bigquery allows function calls like x.y.count(...) 3885 # SAFE.SUBSTR(...) 3886 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3887 this = self._replace_columns_with_dots(this) 3888 3889 if op: 3890 this = op(self, this, field) 3891 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3892 this = self.expression( 3893 exp.Column, 3894 this=field, 3895 table=this.this, 3896 db=this.args.get("table"), 3897 catalog=this.args.get("db"), 3898 ) 3899 else: 3900 this = self.expression(exp.Dot, this=this, expression=field) 3901 this = self._parse_bracket(this) 3902 return this 3903 3904 def _parse_primary(self) -> t.Optional[exp.Expression]: 3905 if self._match_set(self.PRIMARY_PARSERS): 3906 token_type = self._prev.token_type 3907 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3908 3909 if token_type == TokenType.STRING: 3910 expressions = [primary] 3911 while self._match(TokenType.STRING): 3912 expressions.append(exp.Literal.string(self._prev.text)) 3913 3914 if len(expressions) > 1: 3915 return self.expression(exp.Concat, expressions=expressions) 3916 3917 return primary 3918 3919 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3920 return exp.Literal.number(f"0.{self._prev.text}") 3921 3922 if self._match(TokenType.L_PAREN): 3923 comments = self._prev_comments 3924 query = self._parse_select() 3925 3926 if query: 3927 expressions = [query] 3928 else: 3929 expressions = self._parse_expressions() 3930 3931 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3932 3933 if isinstance(this, exp.Subqueryable): 3934 this = self._parse_set_operations( 3935 self._parse_subquery(this=this, parse_alias=False) 3936 ) 3937 elif len(expressions) > 1: 3938 this = self.expression(exp.Tuple, expressions=expressions) 3939 else: 3940 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3941 3942 if this: 3943 this.add_comments(comments) 3944 3945 self._match_r_paren(expression=this) 3946 return this 3947 3948 return None 3949 3950 def _parse_field( 3951 self, 3952 any_token: bool = False, 3953 tokens: t.Optional[t.Collection[TokenType]] = None, 3954 anonymous_func: bool = False, 3955 ) -> t.Optional[exp.Expression]: 3956 return ( 3957 self._parse_primary() 3958 or self._parse_function(anonymous=anonymous_func) 3959 or self._parse_id_var(any_token=any_token, tokens=tokens) 3960 ) 3961 3962 def _parse_function( 3963 self, 3964 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3965 anonymous: bool = False, 3966 optional_parens: bool = True, 3967 ) -> t.Optional[exp.Expression]: 3968 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3969 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3970 fn_syntax = False 3971 if ( 3972 self._match(TokenType.L_BRACE, advance=False) 3973 and self._next 3974 and self._next.text.upper() == "FN" 3975 ): 3976 self._advance(2) 3977 fn_syntax = True 3978 3979 func = self._parse_function_call( 3980 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3981 ) 3982 3983 if fn_syntax: 3984 self._match(TokenType.R_BRACE) 3985 3986 return func 3987 3988 def _parse_function_call( 3989 self, 3990 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3991 anonymous: bool = False, 3992 optional_parens: bool = True, 3993 ) -> t.Optional[exp.Expression]: 3994 if not self._curr: 3995 return None 3996 3997 comments = self._curr.comments 3998 token_type = self._curr.token_type 3999 this = self._curr.text 4000 upper = this.upper() 4001 4002 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4003 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4004 self._advance() 4005 return parser(self) 4006 4007 if not self._next or self._next.token_type != TokenType.L_PAREN: 4008 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4009 self._advance() 4010 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4011 4012 return None 4013 4014 if token_type not in self.FUNC_TOKENS: 4015 return None 4016 4017 self._advance(2) 4018 4019 parser = self.FUNCTION_PARSERS.get(upper) 4020 if parser and not anonymous: 4021 this = parser(self) 4022 else: 4023 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4024 4025 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4026 this = self.expression(subquery_predicate, this=self._parse_select()) 4027 self._match_r_paren() 4028 return this 4029 4030 if functions is None: 4031 functions = self.FUNCTIONS 4032 4033 function = functions.get(upper) 4034 4035 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4036 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4037 4038 if function and not anonymous: 4039 if "dialect" in function.__code__.co_varnames: 4040 func = function(args, dialect=self.dialect) 4041 else: 4042 func = function(args) 4043 4044 func = self.validate_expression(func, args) 4045 if not self.dialect.NORMALIZE_FUNCTIONS: 4046 func.meta["name"] = this 4047 4048 this = func 4049 else: 4050 this = self.expression(exp.Anonymous, this=this, expressions=args) 4051 4052 if isinstance(this, exp.Expression): 4053 this.add_comments(comments) 4054 4055 self._match_r_paren(this) 4056 return self._parse_window(this) 4057 4058 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4059 return self._parse_column_def(self._parse_id_var()) 4060 4061 def _parse_user_defined_function( 4062 self, kind: t.Optional[TokenType] = None 4063 ) -> t.Optional[exp.Expression]: 4064 this = self._parse_id_var() 4065 4066 while self._match(TokenType.DOT): 4067 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4068 4069 if not self._match(TokenType.L_PAREN): 4070 return this 4071 4072 expressions = self._parse_csv(self._parse_function_parameter) 4073 self._match_r_paren() 4074 return self.expression( 4075 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4076 ) 4077 4078 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4079 literal = self._parse_primary() 4080 if literal: 4081 return self.expression(exp.Introducer, this=token.text, expression=literal) 4082 4083 return self.expression(exp.Identifier, this=token.text) 4084 4085 def _parse_session_parameter(self) -> exp.SessionParameter: 4086 kind = None 4087 this = self._parse_id_var() or self._parse_primary() 4088 4089 if this and self._match(TokenType.DOT): 4090 kind = this.name 4091 this = self._parse_var() or self._parse_primary() 4092 4093 return self.expression(exp.SessionParameter, this=this, kind=kind) 4094 4095 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4096 index = self._index 4097 4098 if self._match(TokenType.L_PAREN): 4099 expressions = t.cast( 4100 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4101 ) 4102 4103 if not self._match(TokenType.R_PAREN): 4104 self._retreat(index) 4105 else: 4106 expressions = [self._parse_id_var()] 4107 4108 if self._match_set(self.LAMBDAS): 4109 return self.LAMBDAS[self._prev.token_type](self, expressions) 4110 4111 self._retreat(index) 4112 4113 this: t.Optional[exp.Expression] 4114 4115 if self._match(TokenType.DISTINCT): 4116 this = self.expression( 4117 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4118 ) 4119 else: 4120 this = self._parse_select_or_expression(alias=alias) 4121 4122 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4123 4124 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4125 index = self._index 4126 4127 if not self.errors: 4128 try: 4129 if self._parse_select(nested=True): 4130 return this 4131 except ParseError: 4132 pass 4133 finally: 4134 self.errors.clear() 4135 self._retreat(index) 4136 4137 if not self._match(TokenType.L_PAREN): 4138 return this 4139 4140 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4141 4142 self._match_r_paren() 4143 return self.expression(exp.Schema, this=this, expressions=args) 4144 4145 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4146 return self._parse_column_def(self._parse_field(any_token=True)) 4147 4148 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4149 # column defs are not really columns, they're identifiers 4150 if isinstance(this, exp.Column): 4151 this = this.this 4152 4153 kind = self._parse_types(schema=True) 4154 4155 if self._match_text_seq("FOR", "ORDINALITY"): 4156 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4157 4158 constraints: t.List[exp.Expression] = [] 4159 4160 if not kind and self._match(TokenType.ALIAS): 4161 constraints.append( 4162 self.expression( 4163 exp.ComputedColumnConstraint, 4164 this=self._parse_conjunction(), 4165 persisted=self._match_text_seq("PERSISTED"), 4166 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4167 ) 4168 ) 4169 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4170 self._match(TokenType.ALIAS) 4171 constraints.append( 4172 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4173 ) 4174 4175 while True: 4176 constraint = self._parse_column_constraint() 4177 if not constraint: 4178 break 4179 constraints.append(constraint) 4180 4181 if not kind and not constraints: 4182 return this 4183 4184 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4185 4186 def _parse_auto_increment( 4187 self, 4188 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4189 start = None 4190 increment = None 4191 4192 if self._match(TokenType.L_PAREN, advance=False): 4193 args = self._parse_wrapped_csv(self._parse_bitwise) 4194 start = seq_get(args, 0) 4195 increment = seq_get(args, 1) 4196 elif self._match_text_seq("START"): 4197 start = self._parse_bitwise() 4198 self._match_text_seq("INCREMENT") 4199 increment = self._parse_bitwise() 4200 4201 if start and increment: 4202 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4203 4204 return exp.AutoIncrementColumnConstraint() 4205 4206 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4207 if not self._match_text_seq("REFRESH"): 4208 self._retreat(self._index - 1) 4209 return None 4210 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4211 4212 def _parse_compress(self) -> exp.CompressColumnConstraint: 4213 if self._match(TokenType.L_PAREN, advance=False): 4214 return self.expression( 4215 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4216 ) 4217 4218 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4219 4220 def _parse_generated_as_identity( 4221 self, 4222 ) -> ( 4223 exp.GeneratedAsIdentityColumnConstraint 4224 | exp.ComputedColumnConstraint 4225 | exp.GeneratedAsRowColumnConstraint 4226 ): 4227 if self._match_text_seq("BY", "DEFAULT"): 4228 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4229 this = self.expression( 4230 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4231 ) 4232 else: 4233 self._match_text_seq("ALWAYS") 4234 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4235 4236 self._match(TokenType.ALIAS) 4237 4238 if self._match_text_seq("ROW"): 4239 start = self._match_text_seq("START") 4240 if not start: 4241 self._match(TokenType.END) 4242 hidden = self._match_text_seq("HIDDEN") 4243 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4244 4245 identity = self._match_text_seq("IDENTITY") 4246 4247 if self._match(TokenType.L_PAREN): 4248 if self._match(TokenType.START_WITH): 4249 this.set("start", self._parse_bitwise()) 4250 if self._match_text_seq("INCREMENT", "BY"): 4251 this.set("increment", self._parse_bitwise()) 4252 if self._match_text_seq("MINVALUE"): 4253 this.set("minvalue", self._parse_bitwise()) 4254 if self._match_text_seq("MAXVALUE"): 4255 this.set("maxvalue", self._parse_bitwise()) 4256 4257 if self._match_text_seq("CYCLE"): 4258 this.set("cycle", True) 4259 elif self._match_text_seq("NO", "CYCLE"): 4260 this.set("cycle", False) 4261 4262 if not identity: 4263 this.set("expression", self._parse_bitwise()) 4264 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4265 args = self._parse_csv(self._parse_bitwise) 4266 this.set("start", seq_get(args, 0)) 4267 this.set("increment", seq_get(args, 1)) 4268 4269 self._match_r_paren() 4270 4271 return this 4272 4273 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4274 self._match_text_seq("LENGTH") 4275 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4276 4277 def _parse_not_constraint( 4278 self, 4279 ) -> t.Optional[exp.Expression]: 4280 if self._match_text_seq("NULL"): 4281 return self.expression(exp.NotNullColumnConstraint) 4282 if self._match_text_seq("CASESPECIFIC"): 4283 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4284 if self._match_text_seq("FOR", "REPLICATION"): 4285 return self.expression(exp.NotForReplicationColumnConstraint) 4286 return None 4287 4288 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4289 if self._match(TokenType.CONSTRAINT): 4290 this = self._parse_id_var() 4291 else: 4292 this = None 4293 4294 if self._match_texts(self.CONSTRAINT_PARSERS): 4295 return self.expression( 4296 exp.ColumnConstraint, 4297 this=this, 4298 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4299 ) 4300 4301 return this 4302 4303 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4304 if not self._match(TokenType.CONSTRAINT): 4305 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4306 4307 this = self._parse_id_var() 4308 expressions = [] 4309 4310 while True: 4311 constraint = self._parse_unnamed_constraint() or self._parse_function() 4312 if not constraint: 4313 break 4314 expressions.append(constraint) 4315 4316 return self.expression(exp.Constraint, this=this, expressions=expressions) 4317 4318 def _parse_unnamed_constraint( 4319 self, constraints: t.Optional[t.Collection[str]] = None 4320 ) -> t.Optional[exp.Expression]: 4321 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4322 constraints or self.CONSTRAINT_PARSERS 4323 ): 4324 return None 4325 4326 constraint = self._prev.text.upper() 4327 if constraint not in self.CONSTRAINT_PARSERS: 4328 self.raise_error(f"No parser found for schema constraint {constraint}.") 4329 4330 return self.CONSTRAINT_PARSERS[constraint](self) 4331 4332 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4333 self._match_text_seq("KEY") 4334 return self.expression( 4335 exp.UniqueColumnConstraint, 4336 this=self._parse_schema(self._parse_id_var(any_token=False)), 4337 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4338 ) 4339 4340 def _parse_key_constraint_options(self) -> t.List[str]: 4341 options = [] 4342 while True: 4343 if not self._curr: 4344 break 4345 4346 if self._match(TokenType.ON): 4347 action = None 4348 on = self._advance_any() and self._prev.text 4349 4350 if self._match_text_seq("NO", "ACTION"): 4351 action = "NO ACTION" 4352 elif self._match_text_seq("CASCADE"): 4353 action = "CASCADE" 4354 elif self._match_text_seq("RESTRICT"): 4355 action = "RESTRICT" 4356 elif self._match_pair(TokenType.SET, TokenType.NULL): 4357 action = "SET NULL" 4358 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4359 action = "SET DEFAULT" 4360 else: 4361 self.raise_error("Invalid key constraint") 4362 4363 options.append(f"ON {on} {action}") 4364 elif self._match_text_seq("NOT", "ENFORCED"): 4365 options.append("NOT ENFORCED") 4366 elif self._match_text_seq("DEFERRABLE"): 4367 options.append("DEFERRABLE") 4368 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4369 options.append("INITIALLY DEFERRED") 4370 elif self._match_text_seq("NORELY"): 4371 options.append("NORELY") 4372 elif self._match_text_seq("MATCH", "FULL"): 4373 options.append("MATCH FULL") 4374 else: 4375 break 4376 4377 return options 4378 4379 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4380 if match and not self._match(TokenType.REFERENCES): 4381 return None 4382 4383 expressions = None 4384 this = self._parse_table(schema=True) 4385 options = self._parse_key_constraint_options() 4386 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4387 4388 def _parse_foreign_key(self) -> exp.ForeignKey: 4389 expressions = self._parse_wrapped_id_vars() 4390 reference = self._parse_references() 4391 options = {} 4392 4393 while self._match(TokenType.ON): 4394 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4395 self.raise_error("Expected DELETE or UPDATE") 4396 4397 kind = self._prev.text.lower() 4398 4399 if self._match_text_seq("NO", "ACTION"): 4400 action = "NO ACTION" 4401 elif self._match(TokenType.SET): 4402 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4403 action = "SET " + self._prev.text.upper() 4404 else: 4405 self._advance() 4406 action = self._prev.text.upper() 4407 4408 options[kind] = action 4409 4410 return self.expression( 4411 exp.ForeignKey, 4412 expressions=expressions, 4413 reference=reference, 4414 **options, # type: ignore 4415 ) 4416 4417 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4418 return self._parse_field() 4419 4420 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4421 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4422 self._retreat(self._index - 1) 4423 return None 4424 4425 id_vars = self._parse_wrapped_id_vars() 4426 return self.expression( 4427 exp.PeriodForSystemTimeConstraint, 4428 this=seq_get(id_vars, 0), 4429 expression=seq_get(id_vars, 1), 4430 ) 4431 4432 def _parse_primary_key( 4433 self, wrapped_optional: bool = False, in_props: bool = False 4434 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4435 desc = ( 4436 self._match_set((TokenType.ASC, TokenType.DESC)) 4437 and self._prev.token_type == TokenType.DESC 4438 ) 4439 4440 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4441 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4442 4443 expressions = self._parse_wrapped_csv( 4444 self._parse_primary_key_part, optional=wrapped_optional 4445 ) 4446 options = self._parse_key_constraint_options() 4447 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4448 4449 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4450 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4451 4452 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4453 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4454 return this 4455 4456 bracket_kind = self._prev.token_type 4457 expressions = self._parse_csv( 4458 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4459 ) 4460 4461 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4462 self.raise_error("Expected ]") 4463 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4464 self.raise_error("Expected }") 4465 4466 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4467 if bracket_kind == TokenType.L_BRACE: 4468 this = self.expression(exp.Struct, expressions=expressions) 4469 elif not this or this.name.upper() == "ARRAY": 4470 this = self.expression(exp.Array, expressions=expressions) 4471 else: 4472 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4473 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4474 4475 self._add_comments(this) 4476 return self._parse_bracket(this) 4477 4478 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4479 if self._match(TokenType.COLON): 4480 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4481 return this 4482 4483 def _parse_case(self) -> t.Optional[exp.Expression]: 4484 ifs = [] 4485 default = None 4486 4487 comments = self._prev_comments 4488 expression = self._parse_conjunction() 4489 4490 while self._match(TokenType.WHEN): 4491 this = self._parse_conjunction() 4492 self._match(TokenType.THEN) 4493 then = self._parse_conjunction() 4494 ifs.append(self.expression(exp.If, this=this, true=then)) 4495 4496 if self._match(TokenType.ELSE): 4497 default = self._parse_conjunction() 4498 4499 if not self._match(TokenType.END): 4500 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4501 default = exp.column("interval") 4502 else: 4503 self.raise_error("Expected END after CASE", self._prev) 4504 4505 return self._parse_window( 4506 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4507 ) 4508 4509 def _parse_if(self) -> t.Optional[exp.Expression]: 4510 if self._match(TokenType.L_PAREN): 4511 args = self._parse_csv(self._parse_conjunction) 4512 this = self.validate_expression(exp.If.from_arg_list(args), args) 4513 self._match_r_paren() 4514 else: 4515 index = self._index - 1 4516 4517 if self.NO_PAREN_IF_COMMANDS and index == 0: 4518 return self._parse_as_command(self._prev) 4519 4520 condition = self._parse_conjunction() 4521 4522 if not condition: 4523 self._retreat(index) 4524 return None 4525 4526 self._match(TokenType.THEN) 4527 true = self._parse_conjunction() 4528 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4529 self._match(TokenType.END) 4530 this = self.expression(exp.If, this=condition, true=true, false=false) 4531 4532 return self._parse_window(this) 4533 4534 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4535 if not self._match_text_seq("VALUE", "FOR"): 4536 self._retreat(self._index - 1) 4537 return None 4538 4539 return self.expression( 4540 exp.NextValueFor, 4541 this=self._parse_column(), 4542 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4543 ) 4544 4545 def _parse_extract(self) -> exp.Extract: 4546 this = self._parse_function() or self._parse_var() or self._parse_type() 4547 4548 if self._match(TokenType.FROM): 4549 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4550 4551 if not self._match(TokenType.COMMA): 4552 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4553 4554 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4555 4556 def _parse_any_value(self) -> exp.AnyValue: 4557 this = self._parse_lambda() 4558 is_max = None 4559 having = None 4560 4561 if self._match(TokenType.HAVING): 4562 self._match_texts(("MAX", "MIN")) 4563 is_max = self._prev.text == "MAX" 4564 having = self._parse_column() 4565 4566 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4567 4568 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4569 this = self._parse_conjunction() 4570 4571 if not self._match(TokenType.ALIAS): 4572 if self._match(TokenType.COMMA): 4573 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4574 4575 self.raise_error("Expected AS after CAST") 4576 4577 fmt = None 4578 to = self._parse_types() 4579 4580 if self._match(TokenType.FORMAT): 4581 fmt_string = self._parse_string() 4582 fmt = self._parse_at_time_zone(fmt_string) 4583 4584 if not to: 4585 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4586 if to.this in exp.DataType.TEMPORAL_TYPES: 4587 this = self.expression( 4588 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4589 this=this, 4590 format=exp.Literal.string( 4591 format_time( 4592 fmt_string.this if fmt_string else "", 4593 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4594 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4595 ) 4596 ), 4597 ) 4598 4599 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4600 this.set("zone", fmt.args["zone"]) 4601 return this 4602 elif not to: 4603 self.raise_error("Expected TYPE after CAST") 4604 elif isinstance(to, exp.Identifier): 4605 to = exp.DataType.build(to.name, udt=True) 4606 elif to.this == exp.DataType.Type.CHAR: 4607 if self._match(TokenType.CHARACTER_SET): 4608 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4609 4610 return self.expression( 4611 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4612 ) 4613 4614 def _parse_string_agg(self) -> exp.Expression: 4615 if self._match(TokenType.DISTINCT): 4616 args: t.List[t.Optional[exp.Expression]] = [ 4617 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4618 ] 4619 if self._match(TokenType.COMMA): 4620 args.extend(self._parse_csv(self._parse_conjunction)) 4621 else: 4622 args = self._parse_csv(self._parse_conjunction) # type: ignore 4623 4624 index = self._index 4625 if not self._match(TokenType.R_PAREN) and args: 4626 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4627 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4628 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4629 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4630 4631 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4632 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4633 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4634 if not self._match_text_seq("WITHIN", "GROUP"): 4635 self._retreat(index) 4636 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4637 4638 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4639 order = self._parse_order(this=seq_get(args, 0)) 4640 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4641 4642 def _parse_convert( 4643 self, strict: bool, safe: t.Optional[bool] = None 4644 ) -> t.Optional[exp.Expression]: 4645 this = self._parse_bitwise() 4646 4647 if self._match(TokenType.USING): 4648 to: t.Optional[exp.Expression] = self.expression( 4649 exp.CharacterSet, this=self._parse_var() 4650 ) 4651 elif self._match(TokenType.COMMA): 4652 to = self._parse_types() 4653 else: 4654 to = None 4655 4656 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4657 4658 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4659 """ 4660 There are generally two variants of the DECODE function: 4661 4662 - DECODE(bin, charset) 4663 - DECODE(expression, search, result [, search, result] ... [, default]) 4664 4665 The second variant will always be parsed into a CASE expression. Note that NULL 4666 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4667 instead of relying on pattern matching. 4668 """ 4669 args = self._parse_csv(self._parse_conjunction) 4670 4671 if len(args) < 3: 4672 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4673 4674 expression, *expressions = args 4675 if not expression: 4676 return None 4677 4678 ifs = [] 4679 for search, result in zip(expressions[::2], expressions[1::2]): 4680 if not search or not result: 4681 return None 4682 4683 if isinstance(search, exp.Literal): 4684 ifs.append( 4685 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4686 ) 4687 elif isinstance(search, exp.Null): 4688 ifs.append( 4689 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4690 ) 4691 else: 4692 cond = exp.or_( 4693 exp.EQ(this=expression.copy(), expression=search), 4694 exp.and_( 4695 exp.Is(this=expression.copy(), expression=exp.Null()), 4696 exp.Is(this=search.copy(), expression=exp.Null()), 4697 copy=False, 4698 ), 4699 copy=False, 4700 ) 4701 ifs.append(exp.If(this=cond, true=result)) 4702 4703 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4704 4705 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4706 self._match_text_seq("KEY") 4707 key = self._parse_column() 4708 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4709 self._match_text_seq("VALUE") 4710 value = self._parse_bitwise() 4711 4712 if not key and not value: 4713 return None 4714 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4715 4716 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4717 if not this or not self._match_text_seq("FORMAT", "JSON"): 4718 return this 4719 4720 return self.expression(exp.FormatJson, this=this) 4721 4722 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4723 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4724 for value in values: 4725 if self._match_text_seq(value, "ON", on): 4726 return f"{value} ON {on}" 4727 4728 return None 4729 4730 @t.overload 4731 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4732 ... 4733 4734 @t.overload 4735 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4736 ... 4737 4738 def _parse_json_object(self, agg=False): 4739 star = self._parse_star() 4740 expressions = ( 4741 [star] 4742 if star 4743 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4744 ) 4745 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4746 4747 unique_keys = None 4748 if self._match_text_seq("WITH", "UNIQUE"): 4749 unique_keys = True 4750 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4751 unique_keys = False 4752 4753 self._match_text_seq("KEYS") 4754 4755 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4756 self._parse_type() 4757 ) 4758 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4759 4760 return self.expression( 4761 exp.JSONObjectAgg if agg else exp.JSONObject, 4762 expressions=expressions, 4763 null_handling=null_handling, 4764 unique_keys=unique_keys, 4765 return_type=return_type, 4766 encoding=encoding, 4767 ) 4768 4769 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4770 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4771 if not self._match_text_seq("NESTED"): 4772 this = self._parse_id_var() 4773 kind = self._parse_types(allow_identifiers=False) 4774 nested = None 4775 else: 4776 this = None 4777 kind = None 4778 nested = True 4779 4780 path = self._match_text_seq("PATH") and self._parse_string() 4781 nested_schema = nested and self._parse_json_schema() 4782 4783 return self.expression( 4784 exp.JSONColumnDef, 4785 this=this, 4786 kind=kind, 4787 path=path, 4788 nested_schema=nested_schema, 4789 ) 4790 4791 def _parse_json_schema(self) -> exp.JSONSchema: 4792 self._match_text_seq("COLUMNS") 4793 return self.expression( 4794 exp.JSONSchema, 4795 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4796 ) 4797 4798 def _parse_json_table(self) -> exp.JSONTable: 4799 this = self._parse_format_json(self._parse_bitwise()) 4800 path = self._match(TokenType.COMMA) and self._parse_string() 4801 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4802 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4803 schema = self._parse_json_schema() 4804 4805 return exp.JSONTable( 4806 this=this, 4807 schema=schema, 4808 path=path, 4809 error_handling=error_handling, 4810 empty_handling=empty_handling, 4811 ) 4812 4813 def _parse_match_against(self) -> exp.MatchAgainst: 4814 expressions = self._parse_csv(self._parse_column) 4815 4816 self._match_text_seq(")", "AGAINST", "(") 4817 4818 this = self._parse_string() 4819 4820 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4821 modifier = "IN NATURAL LANGUAGE MODE" 4822 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4823 modifier = f"{modifier} WITH QUERY EXPANSION" 4824 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4825 modifier = "IN BOOLEAN MODE" 4826 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4827 modifier = "WITH QUERY EXPANSION" 4828 else: 4829 modifier = None 4830 4831 return self.expression( 4832 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4833 ) 4834 4835 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4836 def _parse_open_json(self) -> exp.OpenJSON: 4837 this = self._parse_bitwise() 4838 path = self._match(TokenType.COMMA) and self._parse_string() 4839 4840 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4841 this = self._parse_field(any_token=True) 4842 kind = self._parse_types() 4843 path = self._parse_string() 4844 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4845 4846 return self.expression( 4847 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4848 ) 4849 4850 expressions = None 4851 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4852 self._match_l_paren() 4853 expressions = self._parse_csv(_parse_open_json_column_def) 4854 4855 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4856 4857 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4858 args = self._parse_csv(self._parse_bitwise) 4859 4860 if self._match(TokenType.IN): 4861 return self.expression( 4862 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4863 ) 4864 4865 if haystack_first: 4866 haystack = seq_get(args, 0) 4867 needle = seq_get(args, 1) 4868 else: 4869 needle = seq_get(args, 0) 4870 haystack = seq_get(args, 1) 4871 4872 return self.expression( 4873 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4874 ) 4875 4876 def _parse_predict(self) -> exp.Predict: 4877 self._match_text_seq("MODEL") 4878 this = self._parse_table() 4879 4880 self._match(TokenType.COMMA) 4881 self._match_text_seq("TABLE") 4882 4883 return self.expression( 4884 exp.Predict, 4885 this=this, 4886 expression=self._parse_table(), 4887 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4888 ) 4889 4890 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4891 args = self._parse_csv(self._parse_table) 4892 return exp.JoinHint(this=func_name.upper(), expressions=args) 4893 4894 def _parse_substring(self) -> exp.Substring: 4895 # Postgres supports the form: substring(string [from int] [for int]) 4896 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4897 4898 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4899 4900 if self._match(TokenType.FROM): 4901 args.append(self._parse_bitwise()) 4902 if self._match(TokenType.FOR): 4903 args.append(self._parse_bitwise()) 4904 4905 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4906 4907 def _parse_trim(self) -> exp.Trim: 4908 # https://www.w3resource.com/sql/character-functions/trim.php 4909 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4910 4911 position = None 4912 collation = None 4913 expression = None 4914 4915 if self._match_texts(self.TRIM_TYPES): 4916 position = self._prev.text.upper() 4917 4918 this = self._parse_bitwise() 4919 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4920 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4921 expression = self._parse_bitwise() 4922 4923 if invert_order: 4924 this, expression = expression, this 4925 4926 if self._match(TokenType.COLLATE): 4927 collation = self._parse_bitwise() 4928 4929 return self.expression( 4930 exp.Trim, this=this, position=position, expression=expression, collation=collation 4931 ) 4932 4933 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4934 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4935 4936 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4937 return self._parse_window(self._parse_id_var(), alias=True) 4938 4939 def _parse_respect_or_ignore_nulls( 4940 self, this: t.Optional[exp.Expression] 4941 ) -> t.Optional[exp.Expression]: 4942 if self._match_text_seq("IGNORE", "NULLS"): 4943 return self.expression(exp.IgnoreNulls, this=this) 4944 if self._match_text_seq("RESPECT", "NULLS"): 4945 return self.expression(exp.RespectNulls, this=this) 4946 return this 4947 4948 def _parse_window( 4949 self, this: t.Optional[exp.Expression], alias: bool = False 4950 ) -> t.Optional[exp.Expression]: 4951 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4952 self._match(TokenType.WHERE) 4953 this = self.expression( 4954 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4955 ) 4956 self._match_r_paren() 4957 4958 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4959 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4960 if self._match_text_seq("WITHIN", "GROUP"): 4961 order = self._parse_wrapped(self._parse_order) 4962 this = self.expression(exp.WithinGroup, this=this, expression=order) 4963 4964 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4965 # Some dialects choose to implement and some do not. 4966 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4967 4968 # There is some code above in _parse_lambda that handles 4969 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4970 4971 # The below changes handle 4972 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4973 4974 # Oracle allows both formats 4975 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4976 # and Snowflake chose to do the same for familiarity 4977 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4978 if isinstance(this, exp.AggFunc): 4979 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4980 4981 if ignore_respect and ignore_respect is not this: 4982 ignore_respect.replace(ignore_respect.this) 4983 this = self.expression(ignore_respect.__class__, this=this) 4984 4985 this = self._parse_respect_or_ignore_nulls(this) 4986 4987 # bigquery select from window x AS (partition by ...) 4988 if alias: 4989 over = None 4990 self._match(TokenType.ALIAS) 4991 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4992 return this 4993 else: 4994 over = self._prev.text.upper() 4995 4996 if not self._match(TokenType.L_PAREN): 4997 return self.expression( 4998 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4999 ) 5000 5001 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5002 5003 first = self._match(TokenType.FIRST) 5004 if self._match_text_seq("LAST"): 5005 first = False 5006 5007 partition, order = self._parse_partition_and_order() 5008 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5009 5010 if kind: 5011 self._match(TokenType.BETWEEN) 5012 start = self._parse_window_spec() 5013 self._match(TokenType.AND) 5014 end = self._parse_window_spec() 5015 5016 spec = self.expression( 5017 exp.WindowSpec, 5018 kind=kind, 5019 start=start["value"], 5020 start_side=start["side"], 5021 end=end["value"], 5022 end_side=end["side"], 5023 ) 5024 else: 5025 spec = None 5026 5027 self._match_r_paren() 5028 5029 window = self.expression( 5030 exp.Window, 5031 this=this, 5032 partition_by=partition, 5033 order=order, 5034 spec=spec, 5035 alias=window_alias, 5036 over=over, 5037 first=first, 5038 ) 5039 5040 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5041 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5042 return self._parse_window(window, alias=alias) 5043 5044 return window 5045 5046 def _parse_partition_and_order( 5047 self, 5048 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5049 return self._parse_partition_by(), self._parse_order() 5050 5051 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5052 self._match(TokenType.BETWEEN) 5053 5054 return { 5055 "value": ( 5056 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5057 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5058 or self._parse_bitwise() 5059 ), 5060 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5061 } 5062 5063 def _parse_alias( 5064 self, this: t.Optional[exp.Expression], explicit: bool = False 5065 ) -> t.Optional[exp.Expression]: 5066 any_token = self._match(TokenType.ALIAS) 5067 comments = self._prev_comments 5068 5069 if explicit and not any_token: 5070 return this 5071 5072 if self._match(TokenType.L_PAREN): 5073 aliases = self.expression( 5074 exp.Aliases, 5075 comments=comments, 5076 this=this, 5077 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5078 ) 5079 self._match_r_paren(aliases) 5080 return aliases 5081 5082 alias = self._parse_id_var(any_token) or ( 5083 self.STRING_ALIASES and self._parse_string_as_identifier() 5084 ) 5085 5086 if alias: 5087 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5088 column = this.this 5089 5090 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5091 if not this.comments and column and column.comments: 5092 this.comments = column.comments 5093 column.comments = None 5094 5095 return this 5096 5097 def _parse_id_var( 5098 self, 5099 any_token: bool = True, 5100 tokens: t.Optional[t.Collection[TokenType]] = None, 5101 ) -> t.Optional[exp.Expression]: 5102 identifier = self._parse_identifier() 5103 5104 if identifier: 5105 return identifier 5106 5107 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5108 quoted = self._prev.token_type == TokenType.STRING 5109 return exp.Identifier(this=self._prev.text, quoted=quoted) 5110 5111 return None 5112 5113 def _parse_string(self) -> t.Optional[exp.Expression]: 5114 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5115 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5116 return self._parse_placeholder() 5117 5118 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5119 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5120 5121 def _parse_number(self) -> t.Optional[exp.Expression]: 5122 if self._match(TokenType.NUMBER): 5123 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5124 return self._parse_placeholder() 5125 5126 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5127 if self._match(TokenType.IDENTIFIER): 5128 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5129 return self._parse_placeholder() 5130 5131 def _parse_var( 5132 self, 5133 any_token: bool = False, 5134 tokens: t.Optional[t.Collection[TokenType]] = None, 5135 upper: bool = False, 5136 ) -> t.Optional[exp.Expression]: 5137 if ( 5138 (any_token and self._advance_any()) 5139 or self._match(TokenType.VAR) 5140 or (self._match_set(tokens) if tokens else False) 5141 ): 5142 return self.expression( 5143 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5144 ) 5145 return self._parse_placeholder() 5146 5147 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5148 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5149 self._advance() 5150 return self._prev 5151 return None 5152 5153 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5154 return self._parse_var() or self._parse_string() 5155 5156 def _parse_null(self) -> t.Optional[exp.Expression]: 5157 if self._match_set(self.NULL_TOKENS): 5158 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5159 return self._parse_placeholder() 5160 5161 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5162 if self._match(TokenType.TRUE): 5163 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5164 if self._match(TokenType.FALSE): 5165 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5166 return self._parse_placeholder() 5167 5168 def _parse_star(self) -> t.Optional[exp.Expression]: 5169 if self._match(TokenType.STAR): 5170 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5171 return self._parse_placeholder() 5172 5173 def _parse_parameter(self) -> exp.Parameter: 5174 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5175 return ( 5176 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5177 ) 5178 5179 self._match(TokenType.L_BRACE) 5180 this = _parse_parameter_part() 5181 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5182 self._match(TokenType.R_BRACE) 5183 5184 return self.expression(exp.Parameter, this=this, expression=expression) 5185 5186 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5187 if self._match_set(self.PLACEHOLDER_PARSERS): 5188 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5189 if placeholder: 5190 return placeholder 5191 self._advance(-1) 5192 return None 5193 5194 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5195 if not self._match(TokenType.EXCEPT): 5196 return None 5197 if self._match(TokenType.L_PAREN, advance=False): 5198 return self._parse_wrapped_csv(self._parse_column) 5199 5200 except_column = self._parse_column() 5201 return [except_column] if except_column else None 5202 5203 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5204 if not self._match(TokenType.REPLACE): 5205 return None 5206 if self._match(TokenType.L_PAREN, advance=False): 5207 return self._parse_wrapped_csv(self._parse_expression) 5208 5209 replace_expression = self._parse_expression() 5210 return [replace_expression] if replace_expression else None 5211 5212 def _parse_csv( 5213 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5214 ) -> t.List[exp.Expression]: 5215 parse_result = parse_method() 5216 items = [parse_result] if parse_result is not None else [] 5217 5218 while self._match(sep): 5219 self._add_comments(parse_result) 5220 parse_result = parse_method() 5221 if parse_result is not None: 5222 items.append(parse_result) 5223 5224 return items 5225 5226 def _parse_tokens( 5227 self, parse_method: t.Callable, expressions: t.Dict 5228 ) -> t.Optional[exp.Expression]: 5229 this = parse_method() 5230 5231 while self._match_set(expressions): 5232 this = self.expression( 5233 expressions[self._prev.token_type], 5234 this=this, 5235 comments=self._prev_comments, 5236 expression=parse_method(), 5237 ) 5238 5239 return this 5240 5241 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5242 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5243 5244 def _parse_wrapped_csv( 5245 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5246 ) -> t.List[exp.Expression]: 5247 return self._parse_wrapped( 5248 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5249 ) 5250 5251 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5252 wrapped = self._match(TokenType.L_PAREN) 5253 if not wrapped and not optional: 5254 self.raise_error("Expecting (") 5255 parse_result = parse_method() 5256 if wrapped: 5257 self._match_r_paren() 5258 return parse_result 5259 5260 def _parse_expressions(self) -> t.List[exp.Expression]: 5261 return self._parse_csv(self._parse_expression) 5262 5263 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5264 return self._parse_select() or self._parse_set_operations( 5265 self._parse_expression() if alias else self._parse_conjunction() 5266 ) 5267 5268 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5269 return self._parse_query_modifiers( 5270 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5271 ) 5272 5273 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5274 this = None 5275 if self._match_texts(self.TRANSACTION_KIND): 5276 this = self._prev.text 5277 5278 self._match_texts(("TRANSACTION", "WORK")) 5279 5280 modes = [] 5281 while True: 5282 mode = [] 5283 while self._match(TokenType.VAR): 5284 mode.append(self._prev.text) 5285 5286 if mode: 5287 modes.append(" ".join(mode)) 5288 if not self._match(TokenType.COMMA): 5289 break 5290 5291 return self.expression(exp.Transaction, this=this, modes=modes) 5292 5293 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5294 chain = None 5295 savepoint = None 5296 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5297 5298 self._match_texts(("TRANSACTION", "WORK")) 5299 5300 if self._match_text_seq("TO"): 5301 self._match_text_seq("SAVEPOINT") 5302 savepoint = self._parse_id_var() 5303 5304 if self._match(TokenType.AND): 5305 chain = not self._match_text_seq("NO") 5306 self._match_text_seq("CHAIN") 5307 5308 if is_rollback: 5309 return self.expression(exp.Rollback, savepoint=savepoint) 5310 5311 return self.expression(exp.Commit, chain=chain) 5312 5313 def _parse_refresh(self) -> exp.Refresh: 5314 self._match(TokenType.TABLE) 5315 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5316 5317 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5318 if not self._match_text_seq("ADD"): 5319 return None 5320 5321 self._match(TokenType.COLUMN) 5322 exists_column = self._parse_exists(not_=True) 5323 expression = self._parse_field_def() 5324 5325 if expression: 5326 expression.set("exists", exists_column) 5327 5328 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5329 if self._match_texts(("FIRST", "AFTER")): 5330 position = self._prev.text 5331 column_position = self.expression( 5332 exp.ColumnPosition, this=self._parse_column(), position=position 5333 ) 5334 expression.set("position", column_position) 5335 5336 return expression 5337 5338 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5339 drop = self._match(TokenType.DROP) and self._parse_drop() 5340 if drop and not isinstance(drop, exp.Command): 5341 drop.set("kind", drop.args.get("kind", "COLUMN")) 5342 return drop 5343 5344 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5345 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5346 return self.expression( 5347 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5348 ) 5349 5350 def _parse_add_constraint(self) -> exp.AddConstraint: 5351 this = None 5352 kind = self._prev.token_type 5353 5354 if kind == TokenType.CONSTRAINT: 5355 this = self._parse_id_var() 5356 5357 if self._match_text_seq("CHECK"): 5358 expression = self._parse_wrapped(self._parse_conjunction) 5359 enforced = self._match_text_seq("ENFORCED") or False 5360 5361 return self.expression( 5362 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5363 ) 5364 5365 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5366 expression = self._parse_foreign_key() 5367 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5368 expression = self._parse_primary_key() 5369 else: 5370 expression = None 5371 5372 return self.expression(exp.AddConstraint, this=this, expression=expression) 5373 5374 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5375 index = self._index - 1 5376 5377 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5378 return self._parse_csv(self._parse_add_constraint) 5379 5380 self._retreat(index) 5381 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5382 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5383 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5384 5385 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5386 self._match(TokenType.COLUMN) 5387 column = self._parse_field(any_token=True) 5388 5389 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5390 return self.expression(exp.AlterColumn, this=column, drop=True) 5391 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5392 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5393 if self._match(TokenType.COMMENT): 5394 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5395 5396 self._match_text_seq("SET", "DATA") 5397 return self.expression( 5398 exp.AlterColumn, 5399 this=column, 5400 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5401 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5402 using=self._match(TokenType.USING) and self._parse_conjunction(), 5403 ) 5404 5405 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5406 index = self._index - 1 5407 5408 partition_exists = self._parse_exists() 5409 if self._match(TokenType.PARTITION, advance=False): 5410 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5411 5412 self._retreat(index) 5413 return self._parse_csv(self._parse_drop_column) 5414 5415 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5416 if self._match(TokenType.COLUMN): 5417 exists = self._parse_exists() 5418 old_column = self._parse_column() 5419 to = self._match_text_seq("TO") 5420 new_column = self._parse_column() 5421 5422 if old_column is None or to is None or new_column is None: 5423 return None 5424 5425 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5426 5427 self._match_text_seq("TO") 5428 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5429 5430 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5431 start = self._prev 5432 5433 if not self._match(TokenType.TABLE): 5434 return self._parse_as_command(start) 5435 5436 exists = self._parse_exists() 5437 only = self._match_text_seq("ONLY") 5438 this = self._parse_table(schema=True) 5439 5440 if self._next: 5441 self._advance() 5442 5443 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5444 if parser: 5445 actions = ensure_list(parser(self)) 5446 5447 if not self._curr and actions: 5448 return self.expression( 5449 exp.AlterTable, 5450 this=this, 5451 exists=exists, 5452 actions=actions, 5453 only=only, 5454 ) 5455 5456 return self._parse_as_command(start) 5457 5458 def _parse_merge(self) -> exp.Merge: 5459 self._match(TokenType.INTO) 5460 target = self._parse_table() 5461 5462 if target and self._match(TokenType.ALIAS, advance=False): 5463 target.set("alias", self._parse_table_alias()) 5464 5465 self._match(TokenType.USING) 5466 using = self._parse_table() 5467 5468 self._match(TokenType.ON) 5469 on = self._parse_conjunction() 5470 5471 return self.expression( 5472 exp.Merge, 5473 this=target, 5474 using=using, 5475 on=on, 5476 expressions=self._parse_when_matched(), 5477 ) 5478 5479 def _parse_when_matched(self) -> t.List[exp.When]: 5480 whens = [] 5481 5482 while self._match(TokenType.WHEN): 5483 matched = not self._match(TokenType.NOT) 5484 self._match_text_seq("MATCHED") 5485 source = ( 5486 False 5487 if self._match_text_seq("BY", "TARGET") 5488 else self._match_text_seq("BY", "SOURCE") 5489 ) 5490 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5491 5492 self._match(TokenType.THEN) 5493 5494 if self._match(TokenType.INSERT): 5495 _this = self._parse_star() 5496 if _this: 5497 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5498 else: 5499 then = self.expression( 5500 exp.Insert, 5501 this=self._parse_value(), 5502 expression=self._match(TokenType.VALUES) and self._parse_value(), 5503 ) 5504 elif self._match(TokenType.UPDATE): 5505 expressions = self._parse_star() 5506 if expressions: 5507 then = self.expression(exp.Update, expressions=expressions) 5508 else: 5509 then = self.expression( 5510 exp.Update, 5511 expressions=self._match(TokenType.SET) 5512 and self._parse_csv(self._parse_equality), 5513 ) 5514 elif self._match(TokenType.DELETE): 5515 then = self.expression(exp.Var, this=self._prev.text) 5516 else: 5517 then = None 5518 5519 whens.append( 5520 self.expression( 5521 exp.When, 5522 matched=matched, 5523 source=source, 5524 condition=condition, 5525 then=then, 5526 ) 5527 ) 5528 return whens 5529 5530 def _parse_show(self) -> t.Optional[exp.Expression]: 5531 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5532 if parser: 5533 return parser(self) 5534 return self._parse_as_command(self._prev) 5535 5536 def _parse_set_item_assignment( 5537 self, kind: t.Optional[str] = None 5538 ) -> t.Optional[exp.Expression]: 5539 index = self._index 5540 5541 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5542 return self._parse_set_transaction(global_=kind == "GLOBAL") 5543 5544 left = self._parse_primary() or self._parse_id_var() 5545 assignment_delimiter = self._match_texts(("=", "TO")) 5546 5547 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5548 self._retreat(index) 5549 return None 5550 5551 right = self._parse_statement() or self._parse_id_var() 5552 this = self.expression(exp.EQ, this=left, expression=right) 5553 5554 return self.expression(exp.SetItem, this=this, kind=kind) 5555 5556 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5557 self._match_text_seq("TRANSACTION") 5558 characteristics = self._parse_csv( 5559 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5560 ) 5561 return self.expression( 5562 exp.SetItem, 5563 expressions=characteristics, 5564 kind="TRANSACTION", 5565 **{"global": global_}, # type: ignore 5566 ) 5567 5568 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5569 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5570 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5571 5572 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5573 index = self._index 5574 set_ = self.expression( 5575 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5576 ) 5577 5578 if self._curr: 5579 self._retreat(index) 5580 return self._parse_as_command(self._prev) 5581 5582 return set_ 5583 5584 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5585 for option in options: 5586 if self._match_text_seq(*option.split(" ")): 5587 return exp.var(option) 5588 return None 5589 5590 def _parse_as_command(self, start: Token) -> exp.Command: 5591 while self._curr: 5592 self._advance() 5593 text = self._find_sql(start, self._prev) 5594 size = len(start.text) 5595 self._warn_unsupported() 5596 return exp.Command(this=text[:size], expression=text[size:]) 5597 5598 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5599 settings = [] 5600 5601 self._match_l_paren() 5602 kind = self._parse_id_var() 5603 5604 if self._match(TokenType.L_PAREN): 5605 while True: 5606 key = self._parse_id_var() 5607 value = self._parse_primary() 5608 5609 if not key and value is None: 5610 break 5611 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5612 self._match(TokenType.R_PAREN) 5613 5614 self._match_r_paren() 5615 5616 return self.expression( 5617 exp.DictProperty, 5618 this=this, 5619 kind=kind.this if kind else None, 5620 settings=settings, 5621 ) 5622 5623 def _parse_dict_range(self, this: str) -> exp.DictRange: 5624 self._match_l_paren() 5625 has_min = self._match_text_seq("MIN") 5626 if has_min: 5627 min = self._parse_var() or self._parse_primary() 5628 self._match_text_seq("MAX") 5629 max = self._parse_var() or self._parse_primary() 5630 else: 5631 max = self._parse_var() or self._parse_primary() 5632 min = exp.Literal.number(0) 5633 self._match_r_paren() 5634 return self.expression(exp.DictRange, this=this, min=min, max=max) 5635 5636 def _parse_comprehension( 5637 self, this: t.Optional[exp.Expression] 5638 ) -> t.Optional[exp.Comprehension]: 5639 index = self._index 5640 expression = self._parse_column() 5641 if not self._match(TokenType.IN): 5642 self._retreat(index - 1) 5643 return None 5644 iterator = self._parse_column() 5645 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5646 return self.expression( 5647 exp.Comprehension, 5648 this=this, 5649 expression=expression, 5650 iterator=iterator, 5651 condition=condition, 5652 ) 5653 5654 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5655 if self._match(TokenType.HEREDOC_STRING): 5656 return self.expression(exp.Heredoc, this=self._prev.text) 5657 5658 if not self._match_text_seq("$"): 5659 return None 5660 5661 tags = ["$"] 5662 tag_text = None 5663 5664 if self._is_connected(): 5665 self._advance() 5666 tags.append(self._prev.text.upper()) 5667 else: 5668 self.raise_error("No closing $ found") 5669 5670 if tags[-1] != "$": 5671 if self._is_connected() and self._match_text_seq("$"): 5672 tag_text = tags[-1] 5673 tags.append("$") 5674 else: 5675 self.raise_error("No closing $ found") 5676 5677 heredoc_start = self._curr 5678 5679 while self._curr: 5680 if self._match_text_seq(*tags, advance=False): 5681 this = self._find_sql(heredoc_start, self._prev) 5682 self._advance(len(tags)) 5683 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5684 5685 self._advance() 5686 5687 self.raise_error(f"No closing {''.join(tags)} found") 5688 return None 5689 5690 def _find_parser( 5691 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5692 ) -> t.Optional[t.Callable]: 5693 if not self._curr: 5694 return None 5695 5696 index = self._index 5697 this = [] 5698 while True: 5699 # The current token might be multiple words 5700 curr = self._curr.text.upper() 5701 key = curr.split(" ") 5702 this.append(curr) 5703 5704 self._advance() 5705 result, trie = in_trie(trie, key) 5706 if result == TrieResult.FAILED: 5707 break 5708 5709 if result == TrieResult.EXISTS: 5710 subparser = parsers[" ".join(this)] 5711 return subparser 5712 5713 self._retreat(index) 5714 return None 5715 5716 def _match(self, token_type, advance=True, expression=None): 5717 if not self._curr: 5718 return None 5719 5720 if self._curr.token_type == token_type: 5721 if advance: 5722 self._advance() 5723 self._add_comments(expression) 5724 return True 5725 5726 return None 5727 5728 def _match_set(self, types, advance=True): 5729 if not self._curr: 5730 return None 5731 5732 if self._curr.token_type in types: 5733 if advance: 5734 self._advance() 5735 return True 5736 5737 return None 5738 5739 def _match_pair(self, token_type_a, token_type_b, advance=True): 5740 if not self._curr or not self._next: 5741 return None 5742 5743 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5744 if advance: 5745 self._advance(2) 5746 return True 5747 5748 return None 5749 5750 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5751 if not self._match(TokenType.L_PAREN, expression=expression): 5752 self.raise_error("Expecting (") 5753 5754 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5755 if not self._match(TokenType.R_PAREN, expression=expression): 5756 self.raise_error("Expecting )") 5757 5758 def _match_texts(self, texts, advance=True): 5759 if self._curr and self._curr.text.upper() in texts: 5760 if advance: 5761 self._advance() 5762 return True 5763 return None 5764 5765 def _match_text_seq(self, *texts, advance=True): 5766 index = self._index 5767 for text in texts: 5768 if self._curr and self._curr.text.upper() == text: 5769 self._advance() 5770 else: 5771 self._retreat(index) 5772 return None 5773 5774 if not advance: 5775 self._retreat(index) 5776 5777 return True 5778 5779 @t.overload 5780 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5781 ... 5782 5783 @t.overload 5784 def _replace_columns_with_dots( 5785 self, this: t.Optional[exp.Expression] 5786 ) -> t.Optional[exp.Expression]: 5787 ... 5788 5789 def _replace_columns_with_dots(self, this): 5790 if isinstance(this, exp.Dot): 5791 exp.replace_children(this, self._replace_columns_with_dots) 5792 elif isinstance(this, exp.Column): 5793 exp.replace_children(this, self._replace_columns_with_dots) 5794 table = this.args.get("table") 5795 this = ( 5796 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5797 ) 5798 5799 return this 5800 5801 def _replace_lambda( 5802 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5803 ) -> t.Optional[exp.Expression]: 5804 if not node: 5805 return node 5806 5807 for column in node.find_all(exp.Column): 5808 if column.parts[0].name in lambda_variables: 5809 dot_or_id = column.to_dot() if column.table else column.this 5810 parent = column.parent 5811 5812 while isinstance(parent, exp.Dot): 5813 if not isinstance(parent.parent, exp.Dot): 5814 parent.replace(dot_or_id) 5815 break 5816 parent = parent.parent 5817 else: 5818 if column is node: 5819 node = dot_or_id 5820 else: 5821 column.replace(dot_or_id) 5822 return node
22def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 23 if len(args) == 1 and args[0].is_star: 24 return exp.StarMap(this=args[0]) 25 26 keys = [] 27 values = [] 28 for i in range(0, len(args), 2): 29 keys.append(args[i]) 30 values.append(args[i + 1]) 31 32 return exp.VarMap( 33 keys=exp.Array(expressions=keys), 34 values=exp.Array(expressions=values), 35 )
51def parse_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
64def parse_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _parser(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _parser
87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: Determines the amount of context to capture from a 95 query string when displaying the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": parse_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": parse_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": parse_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": parse_like, 123 "LOG": parse_logarithm, 124 "TIME_TO_TIME_STR": lambda args: exp.Cast( 125 this=seq_get(args, 0), 126 to=exp.DataType(this=exp.DataType.Type.TEXT), 127 ), 128 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 129 this=exp.Cast( 130 this=seq_get(args, 0), 131 to=exp.DataType(this=exp.DataType.Type.TEXT), 132 ), 133 start=exp.Literal.number(1), 134 length=exp.Literal.number(10), 135 ), 136 "VAR_MAP": parse_var_map, 137 } 138 139 NO_PAREN_FUNCTIONS = { 140 TokenType.CURRENT_DATE: exp.CurrentDate, 141 TokenType.CURRENT_DATETIME: exp.CurrentDate, 142 TokenType.CURRENT_TIME: exp.CurrentTime, 143 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 144 TokenType.CURRENT_USER: exp.CurrentUser, 145 } 146 147 STRUCT_TYPE_TOKENS = { 148 TokenType.NESTED, 149 TokenType.STRUCT, 150 } 151 152 NESTED_TYPE_TOKENS = { 153 TokenType.ARRAY, 154 TokenType.LOWCARDINALITY, 155 TokenType.MAP, 156 TokenType.NULLABLE, 157 *STRUCT_TYPE_TOKENS, 158 } 159 160 ENUM_TYPE_TOKENS = { 161 TokenType.ENUM, 162 TokenType.ENUM8, 163 TokenType.ENUM16, 164 } 165 166 AGGREGATE_TYPE_TOKENS = { 167 TokenType.AGGREGATEFUNCTION, 168 TokenType.SIMPLEAGGREGATEFUNCTION, 169 } 170 171 TYPE_TOKENS = { 172 TokenType.BIT, 173 TokenType.BOOLEAN, 174 TokenType.TINYINT, 175 TokenType.UTINYINT, 176 TokenType.SMALLINT, 177 TokenType.USMALLINT, 178 TokenType.INT, 179 TokenType.UINT, 180 TokenType.BIGINT, 181 TokenType.UBIGINT, 182 TokenType.INT128, 183 TokenType.UINT128, 184 TokenType.INT256, 185 TokenType.UINT256, 186 TokenType.MEDIUMINT, 187 TokenType.UMEDIUMINT, 188 TokenType.FIXEDSTRING, 189 TokenType.FLOAT, 190 TokenType.DOUBLE, 191 TokenType.CHAR, 192 TokenType.NCHAR, 193 TokenType.VARCHAR, 194 TokenType.NVARCHAR, 195 TokenType.BPCHAR, 196 TokenType.TEXT, 197 TokenType.MEDIUMTEXT, 198 TokenType.LONGTEXT, 199 TokenType.MEDIUMBLOB, 200 TokenType.LONGBLOB, 201 TokenType.BINARY, 202 TokenType.VARBINARY, 203 TokenType.JSON, 204 TokenType.JSONB, 205 TokenType.INTERVAL, 206 TokenType.TINYBLOB, 207 TokenType.TINYTEXT, 208 TokenType.TIME, 209 TokenType.TIMETZ, 210 TokenType.TIMESTAMP, 211 TokenType.TIMESTAMP_S, 212 TokenType.TIMESTAMP_MS, 213 TokenType.TIMESTAMP_NS, 214 TokenType.TIMESTAMPTZ, 215 TokenType.TIMESTAMPLTZ, 216 TokenType.DATETIME, 217 TokenType.DATETIME64, 218 TokenType.DATE, 219 TokenType.DATE32, 220 TokenType.INT4RANGE, 221 TokenType.INT4MULTIRANGE, 222 TokenType.INT8RANGE, 223 TokenType.INT8MULTIRANGE, 224 TokenType.NUMRANGE, 225 TokenType.NUMMULTIRANGE, 226 TokenType.TSRANGE, 227 TokenType.TSMULTIRANGE, 228 TokenType.TSTZRANGE, 229 TokenType.TSTZMULTIRANGE, 230 TokenType.DATERANGE, 231 TokenType.DATEMULTIRANGE, 232 TokenType.DECIMAL, 233 TokenType.UDECIMAL, 234 TokenType.BIGDECIMAL, 235 TokenType.UUID, 236 TokenType.GEOGRAPHY, 237 TokenType.GEOMETRY, 238 TokenType.HLLSKETCH, 239 TokenType.HSTORE, 240 TokenType.PSEUDO_TYPE, 241 TokenType.SUPER, 242 TokenType.SERIAL, 243 TokenType.SMALLSERIAL, 244 TokenType.BIGSERIAL, 245 TokenType.XML, 246 TokenType.YEAR, 247 TokenType.UNIQUEIDENTIFIER, 248 TokenType.USERDEFINED, 249 TokenType.MONEY, 250 TokenType.SMALLMONEY, 251 TokenType.ROWVERSION, 252 TokenType.IMAGE, 253 TokenType.VARIANT, 254 TokenType.OBJECT, 255 TokenType.OBJECT_IDENTIFIER, 256 TokenType.INET, 257 TokenType.IPADDRESS, 258 TokenType.IPPREFIX, 259 TokenType.IPV4, 260 TokenType.IPV6, 261 TokenType.UNKNOWN, 262 TokenType.NULL, 263 *ENUM_TYPE_TOKENS, 264 *NESTED_TYPE_TOKENS, 265 *AGGREGATE_TYPE_TOKENS, 266 } 267 268 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 269 TokenType.BIGINT: TokenType.UBIGINT, 270 TokenType.INT: TokenType.UINT, 271 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 272 TokenType.SMALLINT: TokenType.USMALLINT, 273 TokenType.TINYINT: TokenType.UTINYINT, 274 TokenType.DECIMAL: TokenType.UDECIMAL, 275 } 276 277 SUBQUERY_PREDICATES = { 278 TokenType.ANY: exp.Any, 279 TokenType.ALL: exp.All, 280 TokenType.EXISTS: exp.Exists, 281 TokenType.SOME: exp.Any, 282 } 283 284 RESERVED_TOKENS = { 285 *Tokenizer.SINGLE_TOKENS.values(), 286 TokenType.SELECT, 287 } 288 289 DB_CREATABLES = { 290 TokenType.DATABASE, 291 TokenType.SCHEMA, 292 TokenType.TABLE, 293 TokenType.VIEW, 294 TokenType.MODEL, 295 TokenType.DICTIONARY, 296 } 297 298 CREATABLES = { 299 TokenType.COLUMN, 300 TokenType.CONSTRAINT, 301 TokenType.FUNCTION, 302 TokenType.INDEX, 303 TokenType.PROCEDURE, 304 TokenType.FOREIGN_KEY, 305 *DB_CREATABLES, 306 } 307 308 # Tokens that can represent identifiers 309 ID_VAR_TOKENS = { 310 TokenType.VAR, 311 TokenType.ANTI, 312 TokenType.APPLY, 313 TokenType.ASC, 314 TokenType.AUTO_INCREMENT, 315 TokenType.BEGIN, 316 TokenType.BPCHAR, 317 TokenType.CACHE, 318 TokenType.CASE, 319 TokenType.COLLATE, 320 TokenType.COMMAND, 321 TokenType.COMMENT, 322 TokenType.COMMIT, 323 TokenType.CONSTRAINT, 324 TokenType.DEFAULT, 325 TokenType.DELETE, 326 TokenType.DESC, 327 TokenType.DESCRIBE, 328 TokenType.DICTIONARY, 329 TokenType.DIV, 330 TokenType.END, 331 TokenType.EXECUTE, 332 TokenType.ESCAPE, 333 TokenType.FALSE, 334 TokenType.FIRST, 335 TokenType.FILTER, 336 TokenType.FINAL, 337 TokenType.FORMAT, 338 TokenType.FULL, 339 TokenType.IS, 340 TokenType.ISNULL, 341 TokenType.INTERVAL, 342 TokenType.KEEP, 343 TokenType.KILL, 344 TokenType.LEFT, 345 TokenType.LOAD, 346 TokenType.MERGE, 347 TokenType.NATURAL, 348 TokenType.NEXT, 349 TokenType.OFFSET, 350 TokenType.OPERATOR, 351 TokenType.ORDINALITY, 352 TokenType.OVERLAPS, 353 TokenType.OVERWRITE, 354 TokenType.PARTITION, 355 TokenType.PERCENT, 356 TokenType.PIVOT, 357 TokenType.PRAGMA, 358 TokenType.RANGE, 359 TokenType.RECURSIVE, 360 TokenType.REFERENCES, 361 TokenType.REFRESH, 362 TokenType.REPLACE, 363 TokenType.RIGHT, 364 TokenType.ROW, 365 TokenType.ROWS, 366 TokenType.SEMI, 367 TokenType.SET, 368 TokenType.SETTINGS, 369 TokenType.SHOW, 370 TokenType.TEMPORARY, 371 TokenType.TOP, 372 TokenType.TRUE, 373 TokenType.UNIQUE, 374 TokenType.UNPIVOT, 375 TokenType.UPDATE, 376 TokenType.USE, 377 TokenType.VOLATILE, 378 TokenType.WINDOW, 379 *CREATABLES, 380 *SUBQUERY_PREDICATES, 381 *TYPE_TOKENS, 382 *NO_PAREN_FUNCTIONS, 383 } 384 385 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 386 387 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 388 TokenType.ANTI, 389 TokenType.APPLY, 390 TokenType.ASOF, 391 TokenType.FULL, 392 TokenType.LEFT, 393 TokenType.LOCK, 394 TokenType.NATURAL, 395 TokenType.OFFSET, 396 TokenType.RIGHT, 397 TokenType.SEMI, 398 TokenType.WINDOW, 399 } 400 401 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 402 403 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 404 405 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 406 407 FUNC_TOKENS = { 408 TokenType.COLLATE, 409 TokenType.COMMAND, 410 TokenType.CURRENT_DATE, 411 TokenType.CURRENT_DATETIME, 412 TokenType.CURRENT_TIMESTAMP, 413 TokenType.CURRENT_TIME, 414 TokenType.CURRENT_USER, 415 TokenType.FILTER, 416 TokenType.FIRST, 417 TokenType.FORMAT, 418 TokenType.GLOB, 419 TokenType.IDENTIFIER, 420 TokenType.INDEX, 421 TokenType.ISNULL, 422 TokenType.ILIKE, 423 TokenType.INSERT, 424 TokenType.LIKE, 425 TokenType.MERGE, 426 TokenType.OFFSET, 427 TokenType.PRIMARY_KEY, 428 TokenType.RANGE, 429 TokenType.REPLACE, 430 TokenType.RLIKE, 431 TokenType.ROW, 432 TokenType.UNNEST, 433 TokenType.VAR, 434 TokenType.LEFT, 435 TokenType.RIGHT, 436 TokenType.DATE, 437 TokenType.DATETIME, 438 TokenType.TABLE, 439 TokenType.TIMESTAMP, 440 TokenType.TIMESTAMPTZ, 441 TokenType.WINDOW, 442 TokenType.XOR, 443 *TYPE_TOKENS, 444 *SUBQUERY_PREDICATES, 445 } 446 447 CONJUNCTION = { 448 TokenType.AND: exp.And, 449 TokenType.OR: exp.Or, 450 } 451 452 EQUALITY = { 453 TokenType.COLON_EQ: exp.PropertyEQ, 454 TokenType.EQ: exp.EQ, 455 TokenType.NEQ: exp.NEQ, 456 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 457 } 458 459 COMPARISON = { 460 TokenType.GT: exp.GT, 461 TokenType.GTE: exp.GTE, 462 TokenType.LT: exp.LT, 463 TokenType.LTE: exp.LTE, 464 } 465 466 BITWISE = { 467 TokenType.AMP: exp.BitwiseAnd, 468 TokenType.CARET: exp.BitwiseXor, 469 TokenType.PIPE: exp.BitwiseOr, 470 } 471 472 TERM = { 473 TokenType.DASH: exp.Sub, 474 TokenType.PLUS: exp.Add, 475 TokenType.MOD: exp.Mod, 476 TokenType.COLLATE: exp.Collate, 477 } 478 479 FACTOR = { 480 TokenType.DIV: exp.IntDiv, 481 TokenType.LR_ARROW: exp.Distance, 482 TokenType.SLASH: exp.Div, 483 TokenType.STAR: exp.Mul, 484 } 485 486 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 487 488 TIMES = { 489 TokenType.TIME, 490 TokenType.TIMETZ, 491 } 492 493 TIMESTAMPS = { 494 TokenType.TIMESTAMP, 495 TokenType.TIMESTAMPTZ, 496 TokenType.TIMESTAMPLTZ, 497 *TIMES, 498 } 499 500 SET_OPERATIONS = { 501 TokenType.UNION, 502 TokenType.INTERSECT, 503 TokenType.EXCEPT, 504 } 505 506 JOIN_METHODS = { 507 TokenType.NATURAL, 508 TokenType.ASOF, 509 } 510 511 JOIN_SIDES = { 512 TokenType.LEFT, 513 TokenType.RIGHT, 514 TokenType.FULL, 515 } 516 517 JOIN_KINDS = { 518 TokenType.INNER, 519 TokenType.OUTER, 520 TokenType.CROSS, 521 TokenType.SEMI, 522 TokenType.ANTI, 523 } 524 525 JOIN_HINTS: t.Set[str] = set() 526 527 LAMBDAS = { 528 TokenType.ARROW: lambda self, expressions: self.expression( 529 exp.Lambda, 530 this=self._replace_lambda( 531 self._parse_conjunction(), 532 {node.name for node in expressions}, 533 ), 534 expressions=expressions, 535 ), 536 TokenType.FARROW: lambda self, expressions: self.expression( 537 exp.Kwarg, 538 this=exp.var(expressions[0].name), 539 expression=self._parse_conjunction(), 540 ), 541 } 542 543 COLUMN_OPERATORS = { 544 TokenType.DOT: None, 545 TokenType.DCOLON: lambda self, this, to: self.expression( 546 exp.Cast if self.STRICT_CAST else exp.TryCast, 547 this=this, 548 to=to, 549 ), 550 TokenType.ARROW: lambda self, this, path: self.expression( 551 exp.JSONExtract, 552 this=this, 553 expression=self.dialect.to_json_path(path), 554 ), 555 TokenType.DARROW: lambda self, this, path: self.expression( 556 exp.JSONExtractScalar, 557 this=this, 558 expression=self.dialect.to_json_path(path), 559 ), 560 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 561 exp.JSONBExtract, 562 this=this, 563 expression=path, 564 ), 565 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 566 exp.JSONBExtractScalar, 567 this=this, 568 expression=path, 569 ), 570 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 571 exp.JSONBContains, 572 this=this, 573 expression=key, 574 ), 575 } 576 577 EXPRESSION_PARSERS = { 578 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 579 exp.Column: lambda self: self._parse_column(), 580 exp.Condition: lambda self: self._parse_conjunction(), 581 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 582 exp.Expression: lambda self: self._parse_statement(), 583 exp.From: lambda self: self._parse_from(), 584 exp.Group: lambda self: self._parse_group(), 585 exp.Having: lambda self: self._parse_having(), 586 exp.Identifier: lambda self: self._parse_id_var(), 587 exp.Join: lambda self: self._parse_join(), 588 exp.Lambda: lambda self: self._parse_lambda(), 589 exp.Lateral: lambda self: self._parse_lateral(), 590 exp.Limit: lambda self: self._parse_limit(), 591 exp.Offset: lambda self: self._parse_offset(), 592 exp.Order: lambda self: self._parse_order(), 593 exp.Ordered: lambda self: self._parse_ordered(), 594 exp.Properties: lambda self: self._parse_properties(), 595 exp.Qualify: lambda self: self._parse_qualify(), 596 exp.Returning: lambda self: self._parse_returning(), 597 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 598 exp.Table: lambda self: self._parse_table_parts(), 599 exp.TableAlias: lambda self: self._parse_table_alias(), 600 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 601 exp.Where: lambda self: self._parse_where(), 602 exp.Window: lambda self: self._parse_named_window(), 603 exp.With: lambda self: self._parse_with(), 604 "JOIN_TYPE": lambda self: self._parse_join_parts(), 605 } 606 607 STATEMENT_PARSERS = { 608 TokenType.ALTER: lambda self: self._parse_alter(), 609 TokenType.BEGIN: lambda self: self._parse_transaction(), 610 TokenType.CACHE: lambda self: self._parse_cache(), 611 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 612 TokenType.COMMENT: lambda self: self._parse_comment(), 613 TokenType.CREATE: lambda self: self._parse_create(), 614 TokenType.DELETE: lambda self: self._parse_delete(), 615 TokenType.DESC: lambda self: self._parse_describe(), 616 TokenType.DESCRIBE: lambda self: self._parse_describe(), 617 TokenType.DROP: lambda self: self._parse_drop(), 618 TokenType.INSERT: lambda self: self._parse_insert(), 619 TokenType.KILL: lambda self: self._parse_kill(), 620 TokenType.LOAD: lambda self: self._parse_load(), 621 TokenType.MERGE: lambda self: self._parse_merge(), 622 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 623 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 624 TokenType.REFRESH: lambda self: self._parse_refresh(), 625 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 626 TokenType.SET: lambda self: self._parse_set(), 627 TokenType.UNCACHE: lambda self: self._parse_uncache(), 628 TokenType.UPDATE: lambda self: self._parse_update(), 629 TokenType.USE: lambda self: self.expression( 630 exp.Use, 631 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 632 and exp.var(self._prev.text), 633 this=self._parse_table(schema=False), 634 ), 635 } 636 637 UNARY_PARSERS = { 638 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 639 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 640 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 641 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 642 } 643 644 PRIMARY_PARSERS = { 645 TokenType.STRING: lambda self, token: self.expression( 646 exp.Literal, this=token.text, is_string=True 647 ), 648 TokenType.NUMBER: lambda self, token: self.expression( 649 exp.Literal, this=token.text, is_string=False 650 ), 651 TokenType.STAR: lambda self, _: self.expression( 652 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 653 ), 654 TokenType.NULL: lambda self, _: self.expression(exp.Null), 655 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 656 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 657 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 658 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 659 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 660 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 661 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 662 exp.National, this=token.text 663 ), 664 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.UNICODE_STRING: lambda self, token: self.expression( 669 exp.UnicodeString, 670 this=token.text, 671 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 672 ), 673 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 674 } 675 676 PLACEHOLDER_PARSERS = { 677 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 678 TokenType.PARAMETER: lambda self: self._parse_parameter(), 679 TokenType.COLON: lambda self: ( 680 self.expression(exp.Placeholder, this=self._prev.text) 681 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 682 else None 683 ), 684 } 685 686 RANGE_PARSERS = { 687 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 688 TokenType.GLOB: binary_range_parser(exp.Glob), 689 TokenType.ILIKE: binary_range_parser(exp.ILike), 690 TokenType.IN: lambda self, this: self._parse_in(this), 691 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 692 TokenType.IS: lambda self, this: self._parse_is(this), 693 TokenType.LIKE: binary_range_parser(exp.Like), 694 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 695 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 696 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 697 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 698 } 699 700 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 701 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 702 "AUTO": lambda self: self._parse_auto_property(), 703 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 704 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 705 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 706 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 707 "CHECKSUM": lambda self: self._parse_checksum(), 708 "CLUSTER BY": lambda self: self._parse_cluster(), 709 "CLUSTERED": lambda self: self._parse_clustered_by(), 710 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 711 exp.CollateProperty, **kwargs 712 ), 713 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 714 "CONTAINS": lambda self: self._parse_contains_property(), 715 "COPY": lambda self: self._parse_copy_property(), 716 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 717 "DEFINER": lambda self: self._parse_definer(), 718 "DETERMINISTIC": lambda self: self.expression( 719 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 720 ), 721 "DISTKEY": lambda self: self._parse_distkey(), 722 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 723 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 724 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 725 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 726 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 727 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 728 "FREESPACE": lambda self: self._parse_freespace(), 729 "HEAP": lambda self: self.expression(exp.HeapProperty), 730 "IMMUTABLE": lambda self: self.expression( 731 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 732 ), 733 "INHERITS": lambda self: self.expression( 734 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 735 ), 736 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 737 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 738 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 739 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 740 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 741 "LIKE": lambda self: self._parse_create_like(), 742 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 743 "LOCK": lambda self: self._parse_locking(), 744 "LOCKING": lambda self: self._parse_locking(), 745 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 746 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 747 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 748 "MODIFIES": lambda self: self._parse_modifies_property(), 749 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 750 "NO": lambda self: self._parse_no_property(), 751 "ON": lambda self: self._parse_on_property(), 752 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 753 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 754 "PARTITION": lambda self: self._parse_partitioned_of(), 755 "PARTITION BY": lambda self: self._parse_partitioned_by(), 756 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 757 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 758 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 759 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 760 "READS": lambda self: self._parse_reads_property(), 761 "REMOTE": lambda self: self._parse_remote_with_connection(), 762 "RETURNS": lambda self: self._parse_returns(), 763 "ROW": lambda self: self._parse_row(), 764 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 765 "SAMPLE": lambda self: self.expression( 766 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 767 ), 768 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 769 "SETTINGS": lambda self: self.expression( 770 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 771 ), 772 "SORTKEY": lambda self: self._parse_sortkey(), 773 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 774 "STABLE": lambda self: self.expression( 775 exp.StabilityProperty, this=exp.Literal.string("STABLE") 776 ), 777 "STORED": lambda self: self._parse_stored(), 778 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 779 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 780 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 781 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 782 "TO": lambda self: self._parse_to_table(), 783 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 784 "TRANSFORM": lambda self: self.expression( 785 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 786 ), 787 "TTL": lambda self: self._parse_ttl(), 788 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 789 "VOLATILE": lambda self: self._parse_volatile_property(), 790 "WITH": lambda self: self._parse_with_property(), 791 } 792 793 CONSTRAINT_PARSERS = { 794 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 795 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 796 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 797 "CHARACTER SET": lambda self: self.expression( 798 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 799 ), 800 "CHECK": lambda self: self.expression( 801 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 802 ), 803 "COLLATE": lambda self: self.expression( 804 exp.CollateColumnConstraint, this=self._parse_var() 805 ), 806 "COMMENT": lambda self: self.expression( 807 exp.CommentColumnConstraint, this=self._parse_string() 808 ), 809 "COMPRESS": lambda self: self._parse_compress(), 810 "CLUSTERED": lambda self: self.expression( 811 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 812 ), 813 "NONCLUSTERED": lambda self: self.expression( 814 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 815 ), 816 "DEFAULT": lambda self: self.expression( 817 exp.DefaultColumnConstraint, this=self._parse_bitwise() 818 ), 819 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 820 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 821 "FORMAT": lambda self: self.expression( 822 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 823 ), 824 "GENERATED": lambda self: self._parse_generated_as_identity(), 825 "IDENTITY": lambda self: self._parse_auto_increment(), 826 "INLINE": lambda self: self._parse_inline(), 827 "LIKE": lambda self: self._parse_create_like(), 828 "NOT": lambda self: self._parse_not_constraint(), 829 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 830 "ON": lambda self: ( 831 self._match(TokenType.UPDATE) 832 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 833 ) 834 or self.expression(exp.OnProperty, this=self._parse_id_var()), 835 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 836 "PERIOD": lambda self: self._parse_period_for_system_time(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(), 838 "REFERENCES": lambda self: self._parse_references(match=False), 839 "TITLE": lambda self: self.expression( 840 exp.TitleColumnConstraint, this=self._parse_var_or_string() 841 ), 842 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 843 "UNIQUE": lambda self: self._parse_unique(), 844 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 845 "WITH": lambda self: self.expression( 846 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 847 ), 848 } 849 850 ALTER_PARSERS = { 851 "ADD": lambda self: self._parse_alter_table_add(), 852 "ALTER": lambda self: self._parse_alter_table_alter(), 853 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 854 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 855 "DROP": lambda self: self._parse_alter_table_drop(), 856 "RENAME": lambda self: self._parse_alter_table_rename(), 857 } 858 859 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 860 861 NO_PAREN_FUNCTION_PARSERS = { 862 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 863 "CASE": lambda self: self._parse_case(), 864 "IF": lambda self: self._parse_if(), 865 "NEXT": lambda self: self._parse_next_value_for(), 866 } 867 868 INVALID_FUNC_NAME_TOKENS = { 869 TokenType.IDENTIFIER, 870 TokenType.STRING, 871 } 872 873 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 874 875 FUNCTION_PARSERS = { 876 "ANY_VALUE": lambda self: self._parse_any_value(), 877 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 878 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 879 "DECODE": lambda self: self._parse_decode(), 880 "EXTRACT": lambda self: self._parse_extract(), 881 "JSON_OBJECT": lambda self: self._parse_json_object(), 882 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 883 "JSON_TABLE": lambda self: self._parse_json_table(), 884 "MATCH": lambda self: self._parse_match_against(), 885 "OPENJSON": lambda self: self._parse_open_json(), 886 "POSITION": lambda self: self._parse_position(), 887 "PREDICT": lambda self: self._parse_predict(), 888 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 889 "STRING_AGG": lambda self: self._parse_string_agg(), 890 "SUBSTRING": lambda self: self._parse_substring(), 891 "TRIM": lambda self: self._parse_trim(), 892 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 893 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 894 } 895 896 QUERY_MODIFIER_PARSERS = { 897 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 898 TokenType.WHERE: lambda self: ("where", self._parse_where()), 899 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 900 TokenType.HAVING: lambda self: ("having", self._parse_having()), 901 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 902 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 903 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 904 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 905 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 906 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 907 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 908 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 909 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 910 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 911 TokenType.CLUSTER_BY: lambda self: ( 912 "cluster", 913 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 914 ), 915 TokenType.DISTRIBUTE_BY: lambda self: ( 916 "distribute", 917 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 918 ), 919 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 920 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 921 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 922 } 923 924 SET_PARSERS = { 925 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 926 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 927 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 928 "TRANSACTION": lambda self: self._parse_set_transaction(), 929 } 930 931 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 932 933 TYPE_LITERAL_PARSERS = { 934 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 935 } 936 937 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 938 939 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 940 941 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 942 943 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 944 TRANSACTION_CHARACTERISTICS = { 945 "ISOLATION LEVEL REPEATABLE READ", 946 "ISOLATION LEVEL READ COMMITTED", 947 "ISOLATION LEVEL READ UNCOMMITTED", 948 "ISOLATION LEVEL SERIALIZABLE", 949 "READ WRITE", 950 "READ ONLY", 951 } 952 953 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 954 955 CLONE_KEYWORDS = {"CLONE", "COPY"} 956 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 957 958 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 959 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 960 961 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 962 963 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 964 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 965 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 966 967 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 968 969 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 970 971 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 972 973 DISTINCT_TOKENS = {TokenType.DISTINCT} 974 975 NULL_TOKENS = {TokenType.NULL} 976 977 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 978 979 STRICT_CAST = True 980 981 PREFIXED_PIVOT_COLUMNS = False 982 IDENTIFY_PIVOT_STRINGS = False 983 984 LOG_DEFAULTS_TO_LN = False 985 986 # Whether or not ADD is present for each column added by ALTER TABLE 987 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 988 989 # Whether or not the table sample clause expects CSV syntax 990 TABLESAMPLE_CSV = False 991 992 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 993 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 994 995 # Whether the TRIM function expects the characters to trim as its first argument 996 TRIM_PATTERN_FIRST = False 997 998 # Whether or not string aliases are supported `SELECT COUNT(*) 'count'` 999 STRING_ALIASES = False 1000 1001 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1002 MODIFIERS_ATTACHED_TO_UNION = True 1003 UNION_MODIFIERS = {"order", "limit", "offset"} 1004 1005 # parses no parenthesis if statements as commands 1006 NO_PAREN_IF_COMMANDS = True 1007 1008 __slots__ = ( 1009 "error_level", 1010 "error_message_context", 1011 "max_errors", 1012 "dialect", 1013 "sql", 1014 "errors", 1015 "_tokens", 1016 "_index", 1017 "_curr", 1018 "_next", 1019 "_prev", 1020 "_prev_comments", 1021 ) 1022 1023 # Autofilled 1024 SHOW_TRIE: t.Dict = {} 1025 SET_TRIE: t.Dict = {} 1026 1027 def __init__( 1028 self, 1029 error_level: t.Optional[ErrorLevel] = None, 1030 error_message_context: int = 100, 1031 max_errors: int = 3, 1032 dialect: DialectType = None, 1033 ): 1034 from sqlglot.dialects import Dialect 1035 1036 self.error_level = error_level or ErrorLevel.IMMEDIATE 1037 self.error_message_context = error_message_context 1038 self.max_errors = max_errors 1039 self.dialect = Dialect.get_or_raise(dialect) 1040 self.reset() 1041 1042 def reset(self): 1043 self.sql = "" 1044 self.errors = [] 1045 self._tokens = [] 1046 self._index = 0 1047 self._curr = None 1048 self._next = None 1049 self._prev = None 1050 self._prev_comments = None 1051 1052 def parse( 1053 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 """ 1056 Parses a list of tokens and returns a list of syntax trees, one tree 1057 per parsed SQL statement. 1058 1059 Args: 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The list of the produced syntax trees. 1065 """ 1066 return self._parse( 1067 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1068 ) 1069 1070 def parse_into( 1071 self, 1072 expression_types: exp.IntoType, 1073 raw_tokens: t.List[Token], 1074 sql: t.Optional[str] = None, 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens into a given Expression type. If a collection of Expression 1078 types is given instead, this method will try to parse the token list into each one 1079 of them, stopping at the first for which the parsing succeeds. 1080 1081 Args: 1082 expression_types: The expression type(s) to try and parse the token list into. 1083 raw_tokens: The list of tokens. 1084 sql: The original SQL string, used to produce helpful debug messages. 1085 1086 Returns: 1087 The target Expression. 1088 """ 1089 errors = [] 1090 for expression_type in ensure_list(expression_types): 1091 parser = self.EXPRESSION_PARSERS.get(expression_type) 1092 if not parser: 1093 raise TypeError(f"No parser registered for {expression_type}") 1094 1095 try: 1096 return self._parse(parser, raw_tokens, sql) 1097 except ParseError as e: 1098 e.errors[0]["into_expression"] = expression_type 1099 errors.append(e) 1100 1101 raise ParseError( 1102 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1103 errors=merge_errors(errors), 1104 ) from errors[-1] 1105 1106 def _parse( 1107 self, 1108 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1109 raw_tokens: t.List[Token], 1110 sql: t.Optional[str] = None, 1111 ) -> t.List[t.Optional[exp.Expression]]: 1112 self.reset() 1113 self.sql = sql or "" 1114 1115 total = len(raw_tokens) 1116 chunks: t.List[t.List[Token]] = [[]] 1117 1118 for i, token in enumerate(raw_tokens): 1119 if token.token_type == TokenType.SEMICOLON: 1120 if i < total - 1: 1121 chunks.append([]) 1122 else: 1123 chunks[-1].append(token) 1124 1125 expressions = [] 1126 1127 for tokens in chunks: 1128 self._index = -1 1129 self._tokens = tokens 1130 self._advance() 1131 1132 expressions.append(parse_method(self)) 1133 1134 if self._index < len(self._tokens): 1135 self.raise_error("Invalid expression / Unexpected token") 1136 1137 self.check_errors() 1138 1139 return expressions 1140 1141 def check_errors(self) -> None: 1142 """Logs or raises any found errors, depending on the chosen error level setting.""" 1143 if self.error_level == ErrorLevel.WARN: 1144 for error in self.errors: 1145 logger.error(str(error)) 1146 elif self.error_level == ErrorLevel.RAISE and self.errors: 1147 raise ParseError( 1148 concat_messages(self.errors, self.max_errors), 1149 errors=merge_errors(self.errors), 1150 ) 1151 1152 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1153 """ 1154 Appends an error in the list of recorded errors or raises it, depending on the chosen 1155 error level setting. 1156 """ 1157 token = token or self._curr or self._prev or Token.string("") 1158 start = token.start 1159 end = token.end + 1 1160 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1161 highlight = self.sql[start:end] 1162 end_context = self.sql[end : end + self.error_message_context] 1163 1164 error = ParseError.new( 1165 f"{message}. Line {token.line}, Col: {token.col}.\n" 1166 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1167 description=message, 1168 line=token.line, 1169 col=token.col, 1170 start_context=start_context, 1171 highlight=highlight, 1172 end_context=end_context, 1173 ) 1174 1175 if self.error_level == ErrorLevel.IMMEDIATE: 1176 raise error 1177 1178 self.errors.append(error) 1179 1180 def expression( 1181 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1182 ) -> E: 1183 """ 1184 Creates a new, validated Expression. 1185 1186 Args: 1187 exp_class: The expression class to instantiate. 1188 comments: An optional list of comments to attach to the expression. 1189 kwargs: The arguments to set for the expression along with their respective values. 1190 1191 Returns: 1192 The target expression. 1193 """ 1194 instance = exp_class(**kwargs) 1195 instance.add_comments(comments) if comments else self._add_comments(instance) 1196 return self.validate_expression(instance) 1197 1198 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1199 if expression and self._prev_comments: 1200 expression.add_comments(self._prev_comments) 1201 self._prev_comments = None 1202 1203 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1204 """ 1205 Validates an Expression, making sure that all its mandatory arguments are set. 1206 1207 Args: 1208 expression: The expression to validate. 1209 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1210 1211 Returns: 1212 The validated expression. 1213 """ 1214 if self.error_level != ErrorLevel.IGNORE: 1215 for error_message in expression.error_messages(args): 1216 self.raise_error(error_message) 1217 1218 return expression 1219 1220 def _find_sql(self, start: Token, end: Token) -> str: 1221 return self.sql[start.start : end.end + 1] 1222 1223 def _is_connected(self) -> bool: 1224 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1225 1226 def _advance(self, times: int = 1) -> None: 1227 self._index += times 1228 self._curr = seq_get(self._tokens, self._index) 1229 self._next = seq_get(self._tokens, self._index + 1) 1230 1231 if self._index > 0: 1232 self._prev = self._tokens[self._index - 1] 1233 self._prev_comments = self._prev.comments 1234 else: 1235 self._prev = None 1236 self._prev_comments = None 1237 1238 def _retreat(self, index: int) -> None: 1239 if index != self._index: 1240 self._advance(index - self._index) 1241 1242 def _warn_unsupported(self) -> None: 1243 if len(self._tokens) <= 1: 1244 return 1245 1246 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1247 # interested in emitting a warning for the one being currently processed. 1248 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1249 1250 logger.warning( 1251 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1252 ) 1253 1254 def _parse_command(self) -> exp.Command: 1255 self._warn_unsupported() 1256 return self.expression( 1257 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1258 ) 1259 1260 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1261 start = self._prev 1262 exists = self._parse_exists() if allow_exists else None 1263 1264 self._match(TokenType.ON) 1265 1266 kind = self._match_set(self.CREATABLES) and self._prev 1267 if not kind: 1268 return self._parse_as_command(start) 1269 1270 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1271 this = self._parse_user_defined_function(kind=kind.token_type) 1272 elif kind.token_type == TokenType.TABLE: 1273 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1274 elif kind.token_type == TokenType.COLUMN: 1275 this = self._parse_column() 1276 else: 1277 this = self._parse_id_var() 1278 1279 self._match(TokenType.IS) 1280 1281 return self.expression( 1282 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1283 ) 1284 1285 def _parse_to_table( 1286 self, 1287 ) -> exp.ToTableProperty: 1288 table = self._parse_table_parts(schema=True) 1289 return self.expression(exp.ToTableProperty, this=table) 1290 1291 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1292 def _parse_ttl(self) -> exp.Expression: 1293 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1294 this = self._parse_bitwise() 1295 1296 if self._match_text_seq("DELETE"): 1297 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1298 if self._match_text_seq("RECOMPRESS"): 1299 return self.expression( 1300 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1301 ) 1302 if self._match_text_seq("TO", "DISK"): 1303 return self.expression( 1304 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1305 ) 1306 if self._match_text_seq("TO", "VOLUME"): 1307 return self.expression( 1308 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1309 ) 1310 1311 return this 1312 1313 expressions = self._parse_csv(_parse_ttl_action) 1314 where = self._parse_where() 1315 group = self._parse_group() 1316 1317 aggregates = None 1318 if group and self._match(TokenType.SET): 1319 aggregates = self._parse_csv(self._parse_set_item) 1320 1321 return self.expression( 1322 exp.MergeTreeTTL, 1323 expressions=expressions, 1324 where=where, 1325 group=group, 1326 aggregates=aggregates, 1327 ) 1328 1329 def _parse_statement(self) -> t.Optional[exp.Expression]: 1330 if self._curr is None: 1331 return None 1332 1333 if self._match_set(self.STATEMENT_PARSERS): 1334 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1335 1336 if self._match_set(Tokenizer.COMMANDS): 1337 return self._parse_command() 1338 1339 expression = self._parse_expression() 1340 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1341 return self._parse_query_modifiers(expression) 1342 1343 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1344 start = self._prev 1345 temporary = self._match(TokenType.TEMPORARY) 1346 materialized = self._match_text_seq("MATERIALIZED") 1347 1348 kind = self._match_set(self.CREATABLES) and self._prev.text 1349 if not kind: 1350 return self._parse_as_command(start) 1351 1352 return self.expression( 1353 exp.Drop, 1354 comments=start.comments, 1355 exists=exists or self._parse_exists(), 1356 this=self._parse_table( 1357 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1358 ), 1359 kind=kind, 1360 temporary=temporary, 1361 materialized=materialized, 1362 cascade=self._match_text_seq("CASCADE"), 1363 constraints=self._match_text_seq("CONSTRAINTS"), 1364 purge=self._match_text_seq("PURGE"), 1365 ) 1366 1367 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1368 return ( 1369 self._match_text_seq("IF") 1370 and (not not_ or self._match(TokenType.NOT)) 1371 and self._match(TokenType.EXISTS) 1372 ) 1373 1374 def _parse_create(self) -> exp.Create | exp.Command: 1375 # Note: this can't be None because we've matched a statement parser 1376 start = self._prev 1377 comments = self._prev_comments 1378 1379 replace = ( 1380 start.token_type == TokenType.REPLACE 1381 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1382 or self._match_pair(TokenType.OR, TokenType.ALTER) 1383 ) 1384 unique = self._match(TokenType.UNIQUE) 1385 1386 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1387 self._advance() 1388 1389 properties = None 1390 create_token = self._match_set(self.CREATABLES) and self._prev 1391 1392 if not create_token: 1393 # exp.Properties.Location.POST_CREATE 1394 properties = self._parse_properties() 1395 create_token = self._match_set(self.CREATABLES) and self._prev 1396 1397 if not properties or not create_token: 1398 return self._parse_as_command(start) 1399 1400 exists = self._parse_exists(not_=True) 1401 this = None 1402 expression: t.Optional[exp.Expression] = None 1403 indexes = None 1404 no_schema_binding = None 1405 begin = None 1406 end = None 1407 clone = None 1408 1409 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1410 nonlocal properties 1411 if properties and temp_props: 1412 properties.expressions.extend(temp_props.expressions) 1413 elif temp_props: 1414 properties = temp_props 1415 1416 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1417 this = self._parse_user_defined_function(kind=create_token.token_type) 1418 1419 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1420 extend_props(self._parse_properties()) 1421 1422 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1423 1424 if not expression: 1425 if self._match(TokenType.COMMAND): 1426 expression = self._parse_as_command(self._prev) 1427 else: 1428 begin = self._match(TokenType.BEGIN) 1429 return_ = self._match_text_seq("RETURN") 1430 1431 if self._match(TokenType.STRING, advance=False): 1432 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1433 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1434 expression = self._parse_string() 1435 extend_props(self._parse_properties()) 1436 else: 1437 expression = self._parse_statement() 1438 1439 end = self._match_text_seq("END") 1440 1441 if return_: 1442 expression = self.expression(exp.Return, this=expression) 1443 elif create_token.token_type == TokenType.INDEX: 1444 this = self._parse_index(index=self._parse_id_var()) 1445 elif create_token.token_type in self.DB_CREATABLES: 1446 table_parts = self._parse_table_parts( 1447 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1448 ) 1449 1450 # exp.Properties.Location.POST_NAME 1451 self._match(TokenType.COMMA) 1452 extend_props(self._parse_properties(before=True)) 1453 1454 this = self._parse_schema(this=table_parts) 1455 1456 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1457 extend_props(self._parse_properties()) 1458 1459 self._match(TokenType.ALIAS) 1460 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1461 # exp.Properties.Location.POST_ALIAS 1462 extend_props(self._parse_properties()) 1463 1464 expression = self._parse_ddl_select() 1465 1466 if create_token.token_type == TokenType.TABLE: 1467 # exp.Properties.Location.POST_EXPRESSION 1468 extend_props(self._parse_properties()) 1469 1470 indexes = [] 1471 while True: 1472 index = self._parse_index() 1473 1474 # exp.Properties.Location.POST_INDEX 1475 extend_props(self._parse_properties()) 1476 1477 if not index: 1478 break 1479 else: 1480 self._match(TokenType.COMMA) 1481 indexes.append(index) 1482 elif create_token.token_type == TokenType.VIEW: 1483 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1484 no_schema_binding = True 1485 1486 shallow = self._match_text_seq("SHALLOW") 1487 1488 if self._match_texts(self.CLONE_KEYWORDS): 1489 copy = self._prev.text.lower() == "copy" 1490 clone = self.expression( 1491 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1492 ) 1493 1494 if self._curr: 1495 return self._parse_as_command(start) 1496 1497 return self.expression( 1498 exp.Create, 1499 comments=comments, 1500 this=this, 1501 kind=create_token.text.upper(), 1502 replace=replace, 1503 unique=unique, 1504 expression=expression, 1505 exists=exists, 1506 properties=properties, 1507 indexes=indexes, 1508 no_schema_binding=no_schema_binding, 1509 begin=begin, 1510 end=end, 1511 clone=clone, 1512 ) 1513 1514 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1515 # only used for teradata currently 1516 self._match(TokenType.COMMA) 1517 1518 kwargs = { 1519 "no": self._match_text_seq("NO"), 1520 "dual": self._match_text_seq("DUAL"), 1521 "before": self._match_text_seq("BEFORE"), 1522 "default": self._match_text_seq("DEFAULT"), 1523 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1524 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1525 "after": self._match_text_seq("AFTER"), 1526 "minimum": self._match_texts(("MIN", "MINIMUM")), 1527 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1528 } 1529 1530 if self._match_texts(self.PROPERTY_PARSERS): 1531 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1532 try: 1533 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1534 except TypeError: 1535 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1536 1537 return None 1538 1539 def _parse_property(self) -> t.Optional[exp.Expression]: 1540 if self._match_texts(self.PROPERTY_PARSERS): 1541 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1542 1543 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1544 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1545 1546 if self._match_text_seq("COMPOUND", "SORTKEY"): 1547 return self._parse_sortkey(compound=True) 1548 1549 if self._match_text_seq("SQL", "SECURITY"): 1550 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1551 1552 index = self._index 1553 key = self._parse_column() 1554 1555 if not self._match(TokenType.EQ): 1556 self._retreat(index) 1557 return None 1558 1559 return self.expression( 1560 exp.Property, 1561 this=key.to_dot() if isinstance(key, exp.Column) else key, 1562 value=self._parse_column() or self._parse_var(any_token=True), 1563 ) 1564 1565 def _parse_stored(self) -> exp.FileFormatProperty: 1566 self._match(TokenType.ALIAS) 1567 1568 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1569 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1570 1571 return self.expression( 1572 exp.FileFormatProperty, 1573 this=( 1574 self.expression( 1575 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1576 ) 1577 if input_format or output_format 1578 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1579 ), 1580 ) 1581 1582 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1583 self._match(TokenType.EQ) 1584 self._match(TokenType.ALIAS) 1585 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1586 1587 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1588 properties = [] 1589 while True: 1590 if before: 1591 prop = self._parse_property_before() 1592 else: 1593 prop = self._parse_property() 1594 1595 if not prop: 1596 break 1597 for p in ensure_list(prop): 1598 properties.append(p) 1599 1600 if properties: 1601 return self.expression(exp.Properties, expressions=properties) 1602 1603 return None 1604 1605 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1606 return self.expression( 1607 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1608 ) 1609 1610 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1611 if self._index >= 2: 1612 pre_volatile_token = self._tokens[self._index - 2] 1613 else: 1614 pre_volatile_token = None 1615 1616 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1617 return exp.VolatileProperty() 1618 1619 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1620 1621 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1622 self._match_pair(TokenType.EQ, TokenType.ON) 1623 1624 prop = self.expression(exp.WithSystemVersioningProperty) 1625 if self._match(TokenType.L_PAREN): 1626 self._match_text_seq("HISTORY_TABLE", "=") 1627 prop.set("this", self._parse_table_parts()) 1628 1629 if self._match(TokenType.COMMA): 1630 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1631 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1632 1633 self._match_r_paren() 1634 1635 return prop 1636 1637 def _parse_with_property( 1638 self, 1639 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1640 if self._match(TokenType.L_PAREN, advance=False): 1641 return self._parse_wrapped_csv(self._parse_property) 1642 1643 if self._match_text_seq("JOURNAL"): 1644 return self._parse_withjournaltable() 1645 1646 if self._match_text_seq("DATA"): 1647 return self._parse_withdata(no=False) 1648 elif self._match_text_seq("NO", "DATA"): 1649 return self._parse_withdata(no=True) 1650 1651 if not self._next: 1652 return None 1653 1654 return self._parse_withisolatedloading() 1655 1656 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1657 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1658 self._match(TokenType.EQ) 1659 1660 user = self._parse_id_var() 1661 self._match(TokenType.PARAMETER) 1662 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1663 1664 if not user or not host: 1665 return None 1666 1667 return exp.DefinerProperty(this=f"{user}@{host}") 1668 1669 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1670 self._match(TokenType.TABLE) 1671 self._match(TokenType.EQ) 1672 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1673 1674 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1675 return self.expression(exp.LogProperty, no=no) 1676 1677 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1678 return self.expression(exp.JournalProperty, **kwargs) 1679 1680 def _parse_checksum(self) -> exp.ChecksumProperty: 1681 self._match(TokenType.EQ) 1682 1683 on = None 1684 if self._match(TokenType.ON): 1685 on = True 1686 elif self._match_text_seq("OFF"): 1687 on = False 1688 1689 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1690 1691 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1692 return self.expression( 1693 exp.Cluster, 1694 expressions=( 1695 self._parse_wrapped_csv(self._parse_ordered) 1696 if wrapped 1697 else self._parse_csv(self._parse_ordered) 1698 ), 1699 ) 1700 1701 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1702 self._match_text_seq("BY") 1703 1704 self._match_l_paren() 1705 expressions = self._parse_csv(self._parse_column) 1706 self._match_r_paren() 1707 1708 if self._match_text_seq("SORTED", "BY"): 1709 self._match_l_paren() 1710 sorted_by = self._parse_csv(self._parse_ordered) 1711 self._match_r_paren() 1712 else: 1713 sorted_by = None 1714 1715 self._match(TokenType.INTO) 1716 buckets = self._parse_number() 1717 self._match_text_seq("BUCKETS") 1718 1719 return self.expression( 1720 exp.ClusteredByProperty, 1721 expressions=expressions, 1722 sorted_by=sorted_by, 1723 buckets=buckets, 1724 ) 1725 1726 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1727 if not self._match_text_seq("GRANTS"): 1728 self._retreat(self._index - 1) 1729 return None 1730 1731 return self.expression(exp.CopyGrantsProperty) 1732 1733 def _parse_freespace(self) -> exp.FreespaceProperty: 1734 self._match(TokenType.EQ) 1735 return self.expression( 1736 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1737 ) 1738 1739 def _parse_mergeblockratio( 1740 self, no: bool = False, default: bool = False 1741 ) -> exp.MergeBlockRatioProperty: 1742 if self._match(TokenType.EQ): 1743 return self.expression( 1744 exp.MergeBlockRatioProperty, 1745 this=self._parse_number(), 1746 percent=self._match(TokenType.PERCENT), 1747 ) 1748 1749 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1750 1751 def _parse_datablocksize( 1752 self, 1753 default: t.Optional[bool] = None, 1754 minimum: t.Optional[bool] = None, 1755 maximum: t.Optional[bool] = None, 1756 ) -> exp.DataBlocksizeProperty: 1757 self._match(TokenType.EQ) 1758 size = self._parse_number() 1759 1760 units = None 1761 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1762 units = self._prev.text 1763 1764 return self.expression( 1765 exp.DataBlocksizeProperty, 1766 size=size, 1767 units=units, 1768 default=default, 1769 minimum=minimum, 1770 maximum=maximum, 1771 ) 1772 1773 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1774 self._match(TokenType.EQ) 1775 always = self._match_text_seq("ALWAYS") 1776 manual = self._match_text_seq("MANUAL") 1777 never = self._match_text_seq("NEVER") 1778 default = self._match_text_seq("DEFAULT") 1779 1780 autotemp = None 1781 if self._match_text_seq("AUTOTEMP"): 1782 autotemp = self._parse_schema() 1783 1784 return self.expression( 1785 exp.BlockCompressionProperty, 1786 always=always, 1787 manual=manual, 1788 never=never, 1789 default=default, 1790 autotemp=autotemp, 1791 ) 1792 1793 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1794 no = self._match_text_seq("NO") 1795 concurrent = self._match_text_seq("CONCURRENT") 1796 self._match_text_seq("ISOLATED", "LOADING") 1797 for_all = self._match_text_seq("FOR", "ALL") 1798 for_insert = self._match_text_seq("FOR", "INSERT") 1799 for_none = self._match_text_seq("FOR", "NONE") 1800 return self.expression( 1801 exp.IsolatedLoadingProperty, 1802 no=no, 1803 concurrent=concurrent, 1804 for_all=for_all, 1805 for_insert=for_insert, 1806 for_none=for_none, 1807 ) 1808 1809 def _parse_locking(self) -> exp.LockingProperty: 1810 if self._match(TokenType.TABLE): 1811 kind = "TABLE" 1812 elif self._match(TokenType.VIEW): 1813 kind = "VIEW" 1814 elif self._match(TokenType.ROW): 1815 kind = "ROW" 1816 elif self._match_text_seq("DATABASE"): 1817 kind = "DATABASE" 1818 else: 1819 kind = None 1820 1821 if kind in ("DATABASE", "TABLE", "VIEW"): 1822 this = self._parse_table_parts() 1823 else: 1824 this = None 1825 1826 if self._match(TokenType.FOR): 1827 for_or_in = "FOR" 1828 elif self._match(TokenType.IN): 1829 for_or_in = "IN" 1830 else: 1831 for_or_in = None 1832 1833 if self._match_text_seq("ACCESS"): 1834 lock_type = "ACCESS" 1835 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1836 lock_type = "EXCLUSIVE" 1837 elif self._match_text_seq("SHARE"): 1838 lock_type = "SHARE" 1839 elif self._match_text_seq("READ"): 1840 lock_type = "READ" 1841 elif self._match_text_seq("WRITE"): 1842 lock_type = "WRITE" 1843 elif self._match_text_seq("CHECKSUM"): 1844 lock_type = "CHECKSUM" 1845 else: 1846 lock_type = None 1847 1848 override = self._match_text_seq("OVERRIDE") 1849 1850 return self.expression( 1851 exp.LockingProperty, 1852 this=this, 1853 kind=kind, 1854 for_or_in=for_or_in, 1855 lock_type=lock_type, 1856 override=override, 1857 ) 1858 1859 def _parse_partition_by(self) -> t.List[exp.Expression]: 1860 if self._match(TokenType.PARTITION_BY): 1861 return self._parse_csv(self._parse_conjunction) 1862 return [] 1863 1864 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1865 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1866 if self._match_text_seq("MINVALUE"): 1867 return exp.var("MINVALUE") 1868 if self._match_text_seq("MAXVALUE"): 1869 return exp.var("MAXVALUE") 1870 return self._parse_bitwise() 1871 1872 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1873 expression = None 1874 from_expressions = None 1875 to_expressions = None 1876 1877 if self._match(TokenType.IN): 1878 this = self._parse_wrapped_csv(self._parse_bitwise) 1879 elif self._match(TokenType.FROM): 1880 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1881 self._match_text_seq("TO") 1882 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1883 elif self._match_text_seq("WITH", "(", "MODULUS"): 1884 this = self._parse_number() 1885 self._match_text_seq(",", "REMAINDER") 1886 expression = self._parse_number() 1887 self._match_r_paren() 1888 else: 1889 self.raise_error("Failed to parse partition bound spec.") 1890 1891 return self.expression( 1892 exp.PartitionBoundSpec, 1893 this=this, 1894 expression=expression, 1895 from_expressions=from_expressions, 1896 to_expressions=to_expressions, 1897 ) 1898 1899 # https://www.postgresql.org/docs/current/sql-createtable.html 1900 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1901 if not self._match_text_seq("OF"): 1902 self._retreat(self._index - 1) 1903 return None 1904 1905 this = self._parse_table(schema=True) 1906 1907 if self._match(TokenType.DEFAULT): 1908 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1909 elif self._match_text_seq("FOR", "VALUES"): 1910 expression = self._parse_partition_bound_spec() 1911 else: 1912 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1913 1914 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1915 1916 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1917 self._match(TokenType.EQ) 1918 return self.expression( 1919 exp.PartitionedByProperty, 1920 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1921 ) 1922 1923 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1924 if self._match_text_seq("AND", "STATISTICS"): 1925 statistics = True 1926 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1927 statistics = False 1928 else: 1929 statistics = None 1930 1931 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1932 1933 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1934 if self._match_text_seq("SQL"): 1935 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 1936 return None 1937 1938 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1939 if self._match_text_seq("SQL", "DATA"): 1940 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 1941 return None 1942 1943 def _parse_no_property(self) -> t.Optional[exp.Expression]: 1944 if self._match_text_seq("PRIMARY", "INDEX"): 1945 return exp.NoPrimaryIndexProperty() 1946 if self._match_text_seq("SQL"): 1947 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 1948 return None 1949 1950 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1951 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1952 return exp.OnCommitProperty() 1953 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1954 return exp.OnCommitProperty(delete=True) 1955 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1956 1957 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 1958 if self._match_text_seq("SQL", "DATA"): 1959 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 1960 return None 1961 1962 def _parse_distkey(self) -> exp.DistKeyProperty: 1963 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1964 1965 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1966 table = self._parse_table(schema=True) 1967 1968 options = [] 1969 while self._match_texts(("INCLUDING", "EXCLUDING")): 1970 this = self._prev.text.upper() 1971 1972 id_var = self._parse_id_var() 1973 if not id_var: 1974 return None 1975 1976 options.append( 1977 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1978 ) 1979 1980 return self.expression(exp.LikeProperty, this=table, expressions=options) 1981 1982 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1983 return self.expression( 1984 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1985 ) 1986 1987 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1988 self._match(TokenType.EQ) 1989 return self.expression( 1990 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1991 ) 1992 1993 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1994 self._match_text_seq("WITH", "CONNECTION") 1995 return self.expression( 1996 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1997 ) 1998 1999 def _parse_returns(self) -> exp.ReturnsProperty: 2000 value: t.Optional[exp.Expression] 2001 is_table = self._match(TokenType.TABLE) 2002 2003 if is_table: 2004 if self._match(TokenType.LT): 2005 value = self.expression( 2006 exp.Schema, 2007 this="TABLE", 2008 expressions=self._parse_csv(self._parse_struct_types), 2009 ) 2010 if not self._match(TokenType.GT): 2011 self.raise_error("Expecting >") 2012 else: 2013 value = self._parse_schema(exp.var("TABLE")) 2014 else: 2015 value = self._parse_types() 2016 2017 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2018 2019 def _parse_describe(self) -> exp.Describe: 2020 kind = self._match_set(self.CREATABLES) and self._prev.text 2021 extended = self._match_text_seq("EXTENDED") 2022 this = self._parse_table(schema=True) 2023 properties = self._parse_properties() 2024 expressions = properties.expressions if properties else None 2025 return self.expression( 2026 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2027 ) 2028 2029 def _parse_insert(self) -> exp.Insert: 2030 comments = ensure_list(self._prev_comments) 2031 overwrite = self._match(TokenType.OVERWRITE) 2032 ignore = self._match(TokenType.IGNORE) 2033 local = self._match_text_seq("LOCAL") 2034 alternative = None 2035 2036 if self._match_text_seq("DIRECTORY"): 2037 this: t.Optional[exp.Expression] = self.expression( 2038 exp.Directory, 2039 this=self._parse_var_or_string(), 2040 local=local, 2041 row_format=self._parse_row_format(match_row=True), 2042 ) 2043 else: 2044 if self._match(TokenType.OR): 2045 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2046 2047 self._match(TokenType.INTO) 2048 comments += ensure_list(self._prev_comments) 2049 self._match(TokenType.TABLE) 2050 this = self._parse_table(schema=True) 2051 2052 returning = self._parse_returning() 2053 2054 return self.expression( 2055 exp.Insert, 2056 comments=comments, 2057 this=this, 2058 by_name=self._match_text_seq("BY", "NAME"), 2059 exists=self._parse_exists(), 2060 partition=self._parse_partition(), 2061 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2062 and self._parse_conjunction(), 2063 expression=self._parse_ddl_select(), 2064 conflict=self._parse_on_conflict(), 2065 returning=returning or self._parse_returning(), 2066 overwrite=overwrite, 2067 alternative=alternative, 2068 ignore=ignore, 2069 ) 2070 2071 def _parse_kill(self) -> exp.Kill: 2072 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2073 2074 return self.expression( 2075 exp.Kill, 2076 this=self._parse_primary(), 2077 kind=kind, 2078 ) 2079 2080 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2081 conflict = self._match_text_seq("ON", "CONFLICT") 2082 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2083 2084 if not conflict and not duplicate: 2085 return None 2086 2087 nothing = None 2088 expressions = None 2089 key = None 2090 constraint = None 2091 2092 if conflict: 2093 if self._match_text_seq("ON", "CONSTRAINT"): 2094 constraint = self._parse_id_var() 2095 else: 2096 key = self._parse_csv(self._parse_value) 2097 2098 self._match_text_seq("DO") 2099 if self._match_text_seq("NOTHING"): 2100 nothing = True 2101 else: 2102 self._match(TokenType.UPDATE) 2103 self._match(TokenType.SET) 2104 expressions = self._parse_csv(self._parse_equality) 2105 2106 return self.expression( 2107 exp.OnConflict, 2108 duplicate=duplicate, 2109 expressions=expressions, 2110 nothing=nothing, 2111 key=key, 2112 constraint=constraint, 2113 ) 2114 2115 def _parse_returning(self) -> t.Optional[exp.Returning]: 2116 if not self._match(TokenType.RETURNING): 2117 return None 2118 return self.expression( 2119 exp.Returning, 2120 expressions=self._parse_csv(self._parse_expression), 2121 into=self._match(TokenType.INTO) and self._parse_table_part(), 2122 ) 2123 2124 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2125 if not self._match(TokenType.FORMAT): 2126 return None 2127 return self._parse_row_format() 2128 2129 def _parse_row_format( 2130 self, match_row: bool = False 2131 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2132 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2133 return None 2134 2135 if self._match_text_seq("SERDE"): 2136 this = self._parse_string() 2137 2138 serde_properties = None 2139 if self._match(TokenType.SERDE_PROPERTIES): 2140 serde_properties = self.expression( 2141 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2142 ) 2143 2144 return self.expression( 2145 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2146 ) 2147 2148 self._match_text_seq("DELIMITED") 2149 2150 kwargs = {} 2151 2152 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2153 kwargs["fields"] = self._parse_string() 2154 if self._match_text_seq("ESCAPED", "BY"): 2155 kwargs["escaped"] = self._parse_string() 2156 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2157 kwargs["collection_items"] = self._parse_string() 2158 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2159 kwargs["map_keys"] = self._parse_string() 2160 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2161 kwargs["lines"] = self._parse_string() 2162 if self._match_text_seq("NULL", "DEFINED", "AS"): 2163 kwargs["null"] = self._parse_string() 2164 2165 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2166 2167 def _parse_load(self) -> exp.LoadData | exp.Command: 2168 if self._match_text_seq("DATA"): 2169 local = self._match_text_seq("LOCAL") 2170 self._match_text_seq("INPATH") 2171 inpath = self._parse_string() 2172 overwrite = self._match(TokenType.OVERWRITE) 2173 self._match_pair(TokenType.INTO, TokenType.TABLE) 2174 2175 return self.expression( 2176 exp.LoadData, 2177 this=self._parse_table(schema=True), 2178 local=local, 2179 overwrite=overwrite, 2180 inpath=inpath, 2181 partition=self._parse_partition(), 2182 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2183 serde=self._match_text_seq("SERDE") and self._parse_string(), 2184 ) 2185 return self._parse_as_command(self._prev) 2186 2187 def _parse_delete(self) -> exp.Delete: 2188 # This handles MySQL's "Multiple-Table Syntax" 2189 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2190 tables = None 2191 comments = self._prev_comments 2192 if not self._match(TokenType.FROM, advance=False): 2193 tables = self._parse_csv(self._parse_table) or None 2194 2195 returning = self._parse_returning() 2196 2197 return self.expression( 2198 exp.Delete, 2199 comments=comments, 2200 tables=tables, 2201 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2202 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2203 where=self._parse_where(), 2204 returning=returning or self._parse_returning(), 2205 limit=self._parse_limit(), 2206 ) 2207 2208 def _parse_update(self) -> exp.Update: 2209 comments = self._prev_comments 2210 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2211 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2212 returning = self._parse_returning() 2213 return self.expression( 2214 exp.Update, 2215 comments=comments, 2216 **{ # type: ignore 2217 "this": this, 2218 "expressions": expressions, 2219 "from": self._parse_from(joins=True), 2220 "where": self._parse_where(), 2221 "returning": returning or self._parse_returning(), 2222 "order": self._parse_order(), 2223 "limit": self._parse_limit(), 2224 }, 2225 ) 2226 2227 def _parse_uncache(self) -> exp.Uncache: 2228 if not self._match(TokenType.TABLE): 2229 self.raise_error("Expecting TABLE after UNCACHE") 2230 2231 return self.expression( 2232 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2233 ) 2234 2235 def _parse_cache(self) -> exp.Cache: 2236 lazy = self._match_text_seq("LAZY") 2237 self._match(TokenType.TABLE) 2238 table = self._parse_table(schema=True) 2239 2240 options = [] 2241 if self._match_text_seq("OPTIONS"): 2242 self._match_l_paren() 2243 k = self._parse_string() 2244 self._match(TokenType.EQ) 2245 v = self._parse_string() 2246 options = [k, v] 2247 self._match_r_paren() 2248 2249 self._match(TokenType.ALIAS) 2250 return self.expression( 2251 exp.Cache, 2252 this=table, 2253 lazy=lazy, 2254 options=options, 2255 expression=self._parse_select(nested=True), 2256 ) 2257 2258 def _parse_partition(self) -> t.Optional[exp.Partition]: 2259 if not self._match(TokenType.PARTITION): 2260 return None 2261 2262 return self.expression( 2263 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2264 ) 2265 2266 def _parse_value(self) -> exp.Tuple: 2267 if self._match(TokenType.L_PAREN): 2268 expressions = self._parse_csv(self._parse_expression) 2269 self._match_r_paren() 2270 return self.expression(exp.Tuple, expressions=expressions) 2271 2272 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2273 # https://prestodb.io/docs/current/sql/values.html 2274 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2275 2276 def _parse_projections(self) -> t.List[exp.Expression]: 2277 return self._parse_expressions() 2278 2279 def _parse_select( 2280 self, 2281 nested: bool = False, 2282 table: bool = False, 2283 parse_subquery_alias: bool = True, 2284 parse_set_operation: bool = True, 2285 ) -> t.Optional[exp.Expression]: 2286 cte = self._parse_with() 2287 2288 if cte: 2289 this = self._parse_statement() 2290 2291 if not this: 2292 self.raise_error("Failed to parse any statement following CTE") 2293 return cte 2294 2295 if "with" in this.arg_types: 2296 this.set("with", cte) 2297 else: 2298 self.raise_error(f"{this.key} does not support CTE") 2299 this = cte 2300 2301 return this 2302 2303 # duckdb supports leading with FROM x 2304 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2305 2306 if self._match(TokenType.SELECT): 2307 comments = self._prev_comments 2308 2309 hint = self._parse_hint() 2310 all_ = self._match(TokenType.ALL) 2311 distinct = self._match_set(self.DISTINCT_TOKENS) 2312 2313 kind = ( 2314 self._match(TokenType.ALIAS) 2315 and self._match_texts(("STRUCT", "VALUE")) 2316 and self._prev.text.upper() 2317 ) 2318 2319 if distinct: 2320 distinct = self.expression( 2321 exp.Distinct, 2322 on=self._parse_value() if self._match(TokenType.ON) else None, 2323 ) 2324 2325 if all_ and distinct: 2326 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2327 2328 limit = self._parse_limit(top=True) 2329 projections = self._parse_projections() 2330 2331 this = self.expression( 2332 exp.Select, 2333 kind=kind, 2334 hint=hint, 2335 distinct=distinct, 2336 expressions=projections, 2337 limit=limit, 2338 ) 2339 this.comments = comments 2340 2341 into = self._parse_into() 2342 if into: 2343 this.set("into", into) 2344 2345 if not from_: 2346 from_ = self._parse_from() 2347 2348 if from_: 2349 this.set("from", from_) 2350 2351 this = self._parse_query_modifiers(this) 2352 elif (table or nested) and self._match(TokenType.L_PAREN): 2353 if self._match(TokenType.PIVOT): 2354 this = self._parse_simplified_pivot() 2355 elif self._match(TokenType.FROM): 2356 this = exp.select("*").from_( 2357 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2358 ) 2359 else: 2360 this = ( 2361 self._parse_table() 2362 if table 2363 else self._parse_select(nested=True, parse_set_operation=False) 2364 ) 2365 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2366 2367 self._match_r_paren() 2368 2369 # We return early here so that the UNION isn't attached to the subquery by the 2370 # following call to _parse_set_operations, but instead becomes the parent node 2371 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2372 elif self._match(TokenType.VALUES): 2373 this = self.expression( 2374 exp.Values, 2375 expressions=self._parse_csv(self._parse_value), 2376 alias=self._parse_table_alias(), 2377 ) 2378 elif from_: 2379 this = exp.select("*").from_(from_.this, copy=False) 2380 else: 2381 this = None 2382 2383 if parse_set_operation: 2384 return self._parse_set_operations(this) 2385 return this 2386 2387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2388 if not skip_with_token and not self._match(TokenType.WITH): 2389 return None 2390 2391 comments = self._prev_comments 2392 recursive = self._match(TokenType.RECURSIVE) 2393 2394 expressions = [] 2395 while True: 2396 expressions.append(self._parse_cte()) 2397 2398 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2399 break 2400 else: 2401 self._match(TokenType.WITH) 2402 2403 return self.expression( 2404 exp.With, comments=comments, expressions=expressions, recursive=recursive 2405 ) 2406 2407 def _parse_cte(self) -> exp.CTE: 2408 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2409 if not alias or not alias.this: 2410 self.raise_error("Expected CTE to have alias") 2411 2412 self._match(TokenType.ALIAS) 2413 return self.expression( 2414 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2415 ) 2416 2417 def _parse_table_alias( 2418 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2419 ) -> t.Optional[exp.TableAlias]: 2420 any_token = self._match(TokenType.ALIAS) 2421 alias = ( 2422 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2423 or self._parse_string_as_identifier() 2424 ) 2425 2426 index = self._index 2427 if self._match(TokenType.L_PAREN): 2428 columns = self._parse_csv(self._parse_function_parameter) 2429 self._match_r_paren() if columns else self._retreat(index) 2430 else: 2431 columns = None 2432 2433 if not alias and not columns: 2434 return None 2435 2436 return self.expression(exp.TableAlias, this=alias, columns=columns) 2437 2438 def _parse_subquery( 2439 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2440 ) -> t.Optional[exp.Subquery]: 2441 if not this: 2442 return None 2443 2444 return self.expression( 2445 exp.Subquery, 2446 this=this, 2447 pivots=self._parse_pivots(), 2448 alias=self._parse_table_alias() if parse_alias else None, 2449 ) 2450 2451 def _parse_query_modifiers( 2452 self, this: t.Optional[exp.Expression] 2453 ) -> t.Optional[exp.Expression]: 2454 if isinstance(this, self.MODIFIABLES): 2455 for join in iter(self._parse_join, None): 2456 this.append("joins", join) 2457 for lateral in iter(self._parse_lateral, None): 2458 this.append("laterals", lateral) 2459 2460 while True: 2461 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2462 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2463 key, expression = parser(self) 2464 2465 if expression: 2466 this.set(key, expression) 2467 if key == "limit": 2468 offset = expression.args.pop("offset", None) 2469 if offset: 2470 this.set("offset", exp.Offset(expression=offset)) 2471 continue 2472 break 2473 return this 2474 2475 def _parse_hint(self) -> t.Optional[exp.Hint]: 2476 if self._match(TokenType.HINT): 2477 hints = [] 2478 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2479 hints.extend(hint) 2480 2481 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2482 self.raise_error("Expected */ after HINT") 2483 2484 return self.expression(exp.Hint, expressions=hints) 2485 2486 return None 2487 2488 def _parse_into(self) -> t.Optional[exp.Into]: 2489 if not self._match(TokenType.INTO): 2490 return None 2491 2492 temp = self._match(TokenType.TEMPORARY) 2493 unlogged = self._match_text_seq("UNLOGGED") 2494 self._match(TokenType.TABLE) 2495 2496 return self.expression( 2497 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2498 ) 2499 2500 def _parse_from( 2501 self, joins: bool = False, skip_from_token: bool = False 2502 ) -> t.Optional[exp.From]: 2503 if not skip_from_token and not self._match(TokenType.FROM): 2504 return None 2505 2506 return self.expression( 2507 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2508 ) 2509 2510 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2511 if not self._match(TokenType.MATCH_RECOGNIZE): 2512 return None 2513 2514 self._match_l_paren() 2515 2516 partition = self._parse_partition_by() 2517 order = self._parse_order() 2518 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2519 2520 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2521 rows = exp.var("ONE ROW PER MATCH") 2522 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2523 text = "ALL ROWS PER MATCH" 2524 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2525 text += " SHOW EMPTY MATCHES" 2526 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2527 text += " OMIT EMPTY MATCHES" 2528 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2529 text += " WITH UNMATCHED ROWS" 2530 rows = exp.var(text) 2531 else: 2532 rows = None 2533 2534 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2535 text = "AFTER MATCH SKIP" 2536 if self._match_text_seq("PAST", "LAST", "ROW"): 2537 text += " PAST LAST ROW" 2538 elif self._match_text_seq("TO", "NEXT", "ROW"): 2539 text += " TO NEXT ROW" 2540 elif self._match_text_seq("TO", "FIRST"): 2541 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2542 elif self._match_text_seq("TO", "LAST"): 2543 text += f" TO LAST {self._advance_any().text}" # type: ignore 2544 after = exp.var(text) 2545 else: 2546 after = None 2547 2548 if self._match_text_seq("PATTERN"): 2549 self._match_l_paren() 2550 2551 if not self._curr: 2552 self.raise_error("Expecting )", self._curr) 2553 2554 paren = 1 2555 start = self._curr 2556 2557 while self._curr and paren > 0: 2558 if self._curr.token_type == TokenType.L_PAREN: 2559 paren += 1 2560 if self._curr.token_type == TokenType.R_PAREN: 2561 paren -= 1 2562 2563 end = self._prev 2564 self._advance() 2565 2566 if paren > 0: 2567 self.raise_error("Expecting )", self._curr) 2568 2569 pattern = exp.var(self._find_sql(start, end)) 2570 else: 2571 pattern = None 2572 2573 define = ( 2574 self._parse_csv(self._parse_name_as_expression) 2575 if self._match_text_seq("DEFINE") 2576 else None 2577 ) 2578 2579 self._match_r_paren() 2580 2581 return self.expression( 2582 exp.MatchRecognize, 2583 partition_by=partition, 2584 order=order, 2585 measures=measures, 2586 rows=rows, 2587 after=after, 2588 pattern=pattern, 2589 define=define, 2590 alias=self._parse_table_alias(), 2591 ) 2592 2593 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2594 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2595 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2596 cross_apply = False 2597 2598 if cross_apply is not None: 2599 this = self._parse_select(table=True) 2600 view = None 2601 outer = None 2602 elif self._match(TokenType.LATERAL): 2603 this = self._parse_select(table=True) 2604 view = self._match(TokenType.VIEW) 2605 outer = self._match(TokenType.OUTER) 2606 else: 2607 return None 2608 2609 if not this: 2610 this = ( 2611 self._parse_unnest() 2612 or self._parse_function() 2613 or self._parse_id_var(any_token=False) 2614 ) 2615 2616 while self._match(TokenType.DOT): 2617 this = exp.Dot( 2618 this=this, 2619 expression=self._parse_function() or self._parse_id_var(any_token=False), 2620 ) 2621 2622 if view: 2623 table = self._parse_id_var(any_token=False) 2624 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2625 table_alias: t.Optional[exp.TableAlias] = self.expression( 2626 exp.TableAlias, this=table, columns=columns 2627 ) 2628 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2629 # We move the alias from the lateral's child node to the lateral itself 2630 table_alias = this.args["alias"].pop() 2631 else: 2632 table_alias = self._parse_table_alias() 2633 2634 return self.expression( 2635 exp.Lateral, 2636 this=this, 2637 view=view, 2638 outer=outer, 2639 alias=table_alias, 2640 cross_apply=cross_apply, 2641 ) 2642 2643 def _parse_join_parts( 2644 self, 2645 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2646 return ( 2647 self._match_set(self.JOIN_METHODS) and self._prev, 2648 self._match_set(self.JOIN_SIDES) and self._prev, 2649 self._match_set(self.JOIN_KINDS) and self._prev, 2650 ) 2651 2652 def _parse_join( 2653 self, skip_join_token: bool = False, parse_bracket: bool = False 2654 ) -> t.Optional[exp.Join]: 2655 if self._match(TokenType.COMMA): 2656 return self.expression(exp.Join, this=self._parse_table()) 2657 2658 index = self._index 2659 method, side, kind = self._parse_join_parts() 2660 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2661 join = self._match(TokenType.JOIN) 2662 2663 if not skip_join_token and not join: 2664 self._retreat(index) 2665 kind = None 2666 method = None 2667 side = None 2668 2669 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2670 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2671 2672 if not skip_join_token and not join and not outer_apply and not cross_apply: 2673 return None 2674 2675 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2676 2677 if method: 2678 kwargs["method"] = method.text 2679 if side: 2680 kwargs["side"] = side.text 2681 if kind: 2682 kwargs["kind"] = kind.text 2683 if hint: 2684 kwargs["hint"] = hint 2685 2686 if self._match(TokenType.ON): 2687 kwargs["on"] = self._parse_conjunction() 2688 elif self._match(TokenType.USING): 2689 kwargs["using"] = self._parse_wrapped_id_vars() 2690 elif not (kind and kind.token_type == TokenType.CROSS): 2691 index = self._index 2692 join = self._parse_join() 2693 2694 if join and self._match(TokenType.ON): 2695 kwargs["on"] = self._parse_conjunction() 2696 elif join and self._match(TokenType.USING): 2697 kwargs["using"] = self._parse_wrapped_id_vars() 2698 else: 2699 join = None 2700 self._retreat(index) 2701 2702 kwargs["this"].set("joins", [join] if join else None) 2703 2704 comments = [c for token in (method, side, kind) if token for c in token.comments] 2705 return self.expression(exp.Join, comments=comments, **kwargs) 2706 2707 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2708 this = self._parse_conjunction() 2709 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2710 return this 2711 2712 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2713 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2714 2715 return this 2716 2717 def _parse_index( 2718 self, 2719 index: t.Optional[exp.Expression] = None, 2720 ) -> t.Optional[exp.Index]: 2721 if index: 2722 unique = None 2723 primary = None 2724 amp = None 2725 2726 self._match(TokenType.ON) 2727 self._match(TokenType.TABLE) # hive 2728 table = self._parse_table_parts(schema=True) 2729 else: 2730 unique = self._match(TokenType.UNIQUE) 2731 primary = self._match_text_seq("PRIMARY") 2732 amp = self._match_text_seq("AMP") 2733 2734 if not self._match(TokenType.INDEX): 2735 return None 2736 2737 index = self._parse_id_var() 2738 table = None 2739 2740 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2741 2742 if self._match(TokenType.L_PAREN, advance=False): 2743 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2744 else: 2745 columns = None 2746 2747 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2748 2749 return self.expression( 2750 exp.Index, 2751 this=index, 2752 table=table, 2753 using=using, 2754 columns=columns, 2755 unique=unique, 2756 primary=primary, 2757 amp=amp, 2758 include=include, 2759 partition_by=self._parse_partition_by(), 2760 where=self._parse_where(), 2761 ) 2762 2763 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2764 hints: t.List[exp.Expression] = [] 2765 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2766 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2767 hints.append( 2768 self.expression( 2769 exp.WithTableHint, 2770 expressions=self._parse_csv( 2771 lambda: self._parse_function() or self._parse_var(any_token=True) 2772 ), 2773 ) 2774 ) 2775 self._match_r_paren() 2776 else: 2777 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2778 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2779 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2780 2781 self._match_texts(("INDEX", "KEY")) 2782 if self._match(TokenType.FOR): 2783 hint.set("target", self._advance_any() and self._prev.text.upper()) 2784 2785 hint.set("expressions", self._parse_wrapped_id_vars()) 2786 hints.append(hint) 2787 2788 return hints or None 2789 2790 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2791 return ( 2792 (not schema and self._parse_function(optional_parens=False)) 2793 or self._parse_id_var(any_token=False) 2794 or self._parse_string_as_identifier() 2795 or self._parse_placeholder() 2796 ) 2797 2798 def _parse_table_parts(self, schema: bool = False, is_db_reference: bool = False) -> exp.Table: 2799 catalog = None 2800 db = None 2801 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2802 2803 while self._match(TokenType.DOT): 2804 if catalog: 2805 # This allows nesting the table in arbitrarily many dot expressions if needed 2806 table = self.expression( 2807 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2808 ) 2809 else: 2810 catalog = db 2811 db = table 2812 table = self._parse_table_part(schema=schema) or "" 2813 2814 if is_db_reference: 2815 catalog = db 2816 db = table 2817 table = None 2818 2819 if not table and not is_db_reference: 2820 self.raise_error(f"Expected table name but got {self._curr}") 2821 if not db and is_db_reference: 2822 self.raise_error(f"Expected database name but got {self._curr}") 2823 2824 return self.expression( 2825 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2826 ) 2827 2828 def _parse_table( 2829 self, 2830 schema: bool = False, 2831 joins: bool = False, 2832 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2833 parse_bracket: bool = False, 2834 is_db_reference: bool = False, 2835 ) -> t.Optional[exp.Expression]: 2836 lateral = self._parse_lateral() 2837 if lateral: 2838 return lateral 2839 2840 unnest = self._parse_unnest() 2841 if unnest: 2842 return unnest 2843 2844 values = self._parse_derived_table_values() 2845 if values: 2846 return values 2847 2848 subquery = self._parse_select(table=True) 2849 if subquery: 2850 if not subquery.args.get("pivots"): 2851 subquery.set("pivots", self._parse_pivots()) 2852 return subquery 2853 2854 bracket = parse_bracket and self._parse_bracket(None) 2855 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2856 this = t.cast( 2857 exp.Expression, 2858 bracket 2859 or self._parse_bracket( 2860 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 2861 ), 2862 ) 2863 2864 if schema: 2865 return self._parse_schema(this=this) 2866 2867 version = self._parse_version() 2868 2869 if version: 2870 this.set("version", version) 2871 2872 if self.dialect.ALIAS_POST_TABLESAMPLE: 2873 table_sample = self._parse_table_sample() 2874 2875 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2876 if alias: 2877 this.set("alias", alias) 2878 2879 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 2880 return self.expression( 2881 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 2882 ) 2883 2884 this.set("hints", self._parse_table_hints()) 2885 2886 if not this.args.get("pivots"): 2887 this.set("pivots", self._parse_pivots()) 2888 2889 if not self.dialect.ALIAS_POST_TABLESAMPLE: 2890 table_sample = self._parse_table_sample() 2891 2892 if table_sample: 2893 table_sample.set("this", this) 2894 this = table_sample 2895 2896 if joins: 2897 for join in iter(self._parse_join, None): 2898 this.append("joins", join) 2899 2900 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2901 this.set("ordinality", True) 2902 this.set("alias", self._parse_table_alias()) 2903 2904 return this 2905 2906 def _parse_version(self) -> t.Optional[exp.Version]: 2907 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2908 this = "TIMESTAMP" 2909 elif self._match(TokenType.VERSION_SNAPSHOT): 2910 this = "VERSION" 2911 else: 2912 return None 2913 2914 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2915 kind = self._prev.text.upper() 2916 start = self._parse_bitwise() 2917 self._match_texts(("TO", "AND")) 2918 end = self._parse_bitwise() 2919 expression: t.Optional[exp.Expression] = self.expression( 2920 exp.Tuple, expressions=[start, end] 2921 ) 2922 elif self._match_text_seq("CONTAINED", "IN"): 2923 kind = "CONTAINED IN" 2924 expression = self.expression( 2925 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2926 ) 2927 elif self._match(TokenType.ALL): 2928 kind = "ALL" 2929 expression = None 2930 else: 2931 self._match_text_seq("AS", "OF") 2932 kind = "AS OF" 2933 expression = self._parse_type() 2934 2935 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2936 2937 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2938 if not self._match(TokenType.UNNEST): 2939 return None 2940 2941 expressions = self._parse_wrapped_csv(self._parse_equality) 2942 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2943 2944 alias = self._parse_table_alias() if with_alias else None 2945 2946 if alias: 2947 if self.dialect.UNNEST_COLUMN_ONLY: 2948 if alias.args.get("columns"): 2949 self.raise_error("Unexpected extra column alias in unnest.") 2950 2951 alias.set("columns", [alias.this]) 2952 alias.set("this", None) 2953 2954 columns = alias.args.get("columns") or [] 2955 if offset and len(expressions) < len(columns): 2956 offset = columns.pop() 2957 2958 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2959 self._match(TokenType.ALIAS) 2960 offset = self._parse_id_var( 2961 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2962 ) or exp.to_identifier("offset") 2963 2964 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2965 2966 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2967 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2968 if not is_derived and not self._match(TokenType.VALUES): 2969 return None 2970 2971 expressions = self._parse_csv(self._parse_value) 2972 alias = self._parse_table_alias() 2973 2974 if is_derived: 2975 self._match_r_paren() 2976 2977 return self.expression( 2978 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2979 ) 2980 2981 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2982 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2983 as_modifier and self._match_text_seq("USING", "SAMPLE") 2984 ): 2985 return None 2986 2987 bucket_numerator = None 2988 bucket_denominator = None 2989 bucket_field = None 2990 percent = None 2991 size = None 2992 seed = None 2993 2994 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 2995 matched_l_paren = self._match(TokenType.L_PAREN) 2996 2997 if self.TABLESAMPLE_CSV: 2998 num = None 2999 expressions = self._parse_csv(self._parse_primary) 3000 else: 3001 expressions = None 3002 num = ( 3003 self._parse_factor() 3004 if self._match(TokenType.NUMBER, advance=False) 3005 else self._parse_primary() or self._parse_placeholder() 3006 ) 3007 3008 if self._match_text_seq("BUCKET"): 3009 bucket_numerator = self._parse_number() 3010 self._match_text_seq("OUT", "OF") 3011 bucket_denominator = bucket_denominator = self._parse_number() 3012 self._match(TokenType.ON) 3013 bucket_field = self._parse_field() 3014 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3015 percent = num 3016 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3017 size = num 3018 else: 3019 percent = num 3020 3021 if matched_l_paren: 3022 self._match_r_paren() 3023 3024 if self._match(TokenType.L_PAREN): 3025 method = self._parse_var(upper=True) 3026 seed = self._match(TokenType.COMMA) and self._parse_number() 3027 self._match_r_paren() 3028 elif self._match_texts(("SEED", "REPEATABLE")): 3029 seed = self._parse_wrapped(self._parse_number) 3030 3031 return self.expression( 3032 exp.TableSample, 3033 expressions=expressions, 3034 method=method, 3035 bucket_numerator=bucket_numerator, 3036 bucket_denominator=bucket_denominator, 3037 bucket_field=bucket_field, 3038 percent=percent, 3039 size=size, 3040 seed=seed, 3041 ) 3042 3043 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3044 return list(iter(self._parse_pivot, None)) or None 3045 3046 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3047 return list(iter(self._parse_join, None)) or None 3048 3049 # https://duckdb.org/docs/sql/statements/pivot 3050 def _parse_simplified_pivot(self) -> exp.Pivot: 3051 def _parse_on() -> t.Optional[exp.Expression]: 3052 this = self._parse_bitwise() 3053 return self._parse_in(this) if self._match(TokenType.IN) else this 3054 3055 this = self._parse_table() 3056 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3057 using = self._match(TokenType.USING) and self._parse_csv( 3058 lambda: self._parse_alias(self._parse_function()) 3059 ) 3060 group = self._parse_group() 3061 return self.expression( 3062 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3063 ) 3064 3065 def _parse_pivot_in(self) -> exp.In: 3066 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3067 this = self._parse_conjunction() 3068 3069 self._match(TokenType.ALIAS) 3070 alias = self._parse_field() 3071 if alias: 3072 return self.expression(exp.PivotAlias, this=this, alias=alias) 3073 3074 return this 3075 3076 value = self._parse_column() 3077 3078 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3079 self.raise_error("Expecting IN (") 3080 3081 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3082 3083 self._match_r_paren() 3084 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3085 3086 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3087 index = self._index 3088 include_nulls = None 3089 3090 if self._match(TokenType.PIVOT): 3091 unpivot = False 3092 elif self._match(TokenType.UNPIVOT): 3093 unpivot = True 3094 3095 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3096 if self._match_text_seq("INCLUDE", "NULLS"): 3097 include_nulls = True 3098 elif self._match_text_seq("EXCLUDE", "NULLS"): 3099 include_nulls = False 3100 else: 3101 return None 3102 3103 expressions = [] 3104 3105 if not self._match(TokenType.L_PAREN): 3106 self._retreat(index) 3107 return None 3108 3109 if unpivot: 3110 expressions = self._parse_csv(self._parse_column) 3111 else: 3112 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3113 3114 if not expressions: 3115 self.raise_error("Failed to parse PIVOT's aggregation list") 3116 3117 if not self._match(TokenType.FOR): 3118 self.raise_error("Expecting FOR") 3119 3120 field = self._parse_pivot_in() 3121 3122 self._match_r_paren() 3123 3124 pivot = self.expression( 3125 exp.Pivot, 3126 expressions=expressions, 3127 field=field, 3128 unpivot=unpivot, 3129 include_nulls=include_nulls, 3130 ) 3131 3132 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3133 pivot.set("alias", self._parse_table_alias()) 3134 3135 if not unpivot: 3136 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3137 3138 columns: t.List[exp.Expression] = [] 3139 for fld in pivot.args["field"].expressions: 3140 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3141 for name in names: 3142 if self.PREFIXED_PIVOT_COLUMNS: 3143 name = f"{name}_{field_name}" if name else field_name 3144 else: 3145 name = f"{field_name}_{name}" if name else field_name 3146 3147 columns.append(exp.to_identifier(name)) 3148 3149 pivot.set("columns", columns) 3150 3151 return pivot 3152 3153 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3154 return [agg.alias for agg in aggregations] 3155 3156 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3157 if not skip_where_token and not self._match(TokenType.WHERE): 3158 return None 3159 3160 return self.expression( 3161 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3162 ) 3163 3164 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3165 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3166 return None 3167 3168 elements = defaultdict(list) 3169 3170 if self._match(TokenType.ALL): 3171 return self.expression(exp.Group, all=True) 3172 3173 while True: 3174 expressions = self._parse_csv(self._parse_conjunction) 3175 if expressions: 3176 elements["expressions"].extend(expressions) 3177 3178 grouping_sets = self._parse_grouping_sets() 3179 if grouping_sets: 3180 elements["grouping_sets"].extend(grouping_sets) 3181 3182 rollup = None 3183 cube = None 3184 totals = None 3185 3186 index = self._index 3187 with_ = self._match(TokenType.WITH) 3188 if self._match(TokenType.ROLLUP): 3189 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3190 elements["rollup"].extend(ensure_list(rollup)) 3191 3192 if self._match(TokenType.CUBE): 3193 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3194 elements["cube"].extend(ensure_list(cube)) 3195 3196 if self._match_text_seq("TOTALS"): 3197 totals = True 3198 elements["totals"] = True # type: ignore 3199 3200 if not (grouping_sets or rollup or cube or totals): 3201 if with_: 3202 self._retreat(index) 3203 break 3204 3205 return self.expression(exp.Group, **elements) # type: ignore 3206 3207 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3208 if not self._match(TokenType.GROUPING_SETS): 3209 return None 3210 3211 return self._parse_wrapped_csv(self._parse_grouping_set) 3212 3213 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3214 if self._match(TokenType.L_PAREN): 3215 grouping_set = self._parse_csv(self._parse_column) 3216 self._match_r_paren() 3217 return self.expression(exp.Tuple, expressions=grouping_set) 3218 3219 return self._parse_column() 3220 3221 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3222 if not skip_having_token and not self._match(TokenType.HAVING): 3223 return None 3224 return self.expression(exp.Having, this=self._parse_conjunction()) 3225 3226 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3227 if not self._match(TokenType.QUALIFY): 3228 return None 3229 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3230 3231 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3232 if skip_start_token: 3233 start = None 3234 elif self._match(TokenType.START_WITH): 3235 start = self._parse_conjunction() 3236 else: 3237 return None 3238 3239 self._match(TokenType.CONNECT_BY) 3240 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3241 exp.Prior, this=self._parse_bitwise() 3242 ) 3243 connect = self._parse_conjunction() 3244 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3245 3246 if not start and self._match(TokenType.START_WITH): 3247 start = self._parse_conjunction() 3248 3249 return self.expression(exp.Connect, start=start, connect=connect) 3250 3251 def _parse_name_as_expression(self) -> exp.Alias: 3252 return self.expression( 3253 exp.Alias, 3254 alias=self._parse_id_var(any_token=True), 3255 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3256 ) 3257 3258 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3259 if self._match_text_seq("INTERPOLATE"): 3260 return self._parse_wrapped_csv(self._parse_name_as_expression) 3261 return None 3262 3263 def _parse_order( 3264 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3265 ) -> t.Optional[exp.Expression]: 3266 siblings = None 3267 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3268 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3269 return this 3270 3271 siblings = True 3272 3273 return self.expression( 3274 exp.Order, 3275 this=this, 3276 expressions=self._parse_csv(self._parse_ordered), 3277 interpolate=self._parse_interpolate(), 3278 siblings=siblings, 3279 ) 3280 3281 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3282 if not self._match(token): 3283 return None 3284 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3285 3286 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3287 this = parse_method() if parse_method else self._parse_conjunction() 3288 3289 asc = self._match(TokenType.ASC) 3290 desc = self._match(TokenType.DESC) or (asc and False) 3291 3292 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3293 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3294 3295 nulls_first = is_nulls_first or False 3296 explicitly_null_ordered = is_nulls_first or is_nulls_last 3297 3298 if ( 3299 not explicitly_null_ordered 3300 and ( 3301 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3302 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3303 ) 3304 and self.dialect.NULL_ORDERING != "nulls_are_last" 3305 ): 3306 nulls_first = True 3307 3308 if self._match_text_seq("WITH", "FILL"): 3309 with_fill = self.expression( 3310 exp.WithFill, 3311 **{ # type: ignore 3312 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3313 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3314 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3315 }, 3316 ) 3317 else: 3318 with_fill = None 3319 3320 return self.expression( 3321 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3322 ) 3323 3324 def _parse_limit( 3325 self, this: t.Optional[exp.Expression] = None, top: bool = False 3326 ) -> t.Optional[exp.Expression]: 3327 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3328 comments = self._prev_comments 3329 if top: 3330 limit_paren = self._match(TokenType.L_PAREN) 3331 expression = self._parse_term() if limit_paren else self._parse_number() 3332 3333 if limit_paren: 3334 self._match_r_paren() 3335 else: 3336 expression = self._parse_term() 3337 3338 if self._match(TokenType.COMMA): 3339 offset = expression 3340 expression = self._parse_term() 3341 else: 3342 offset = None 3343 3344 limit_exp = self.expression( 3345 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3346 ) 3347 3348 return limit_exp 3349 3350 if self._match(TokenType.FETCH): 3351 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3352 direction = self._prev.text.upper() if direction else "FIRST" 3353 3354 count = self._parse_field(tokens=self.FETCH_TOKENS) 3355 percent = self._match(TokenType.PERCENT) 3356 3357 self._match_set((TokenType.ROW, TokenType.ROWS)) 3358 3359 only = self._match_text_seq("ONLY") 3360 with_ties = self._match_text_seq("WITH", "TIES") 3361 3362 if only and with_ties: 3363 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3364 3365 return self.expression( 3366 exp.Fetch, 3367 direction=direction, 3368 count=count, 3369 percent=percent, 3370 with_ties=with_ties, 3371 ) 3372 3373 return this 3374 3375 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3376 if not self._match(TokenType.OFFSET): 3377 return this 3378 3379 count = self._parse_term() 3380 self._match_set((TokenType.ROW, TokenType.ROWS)) 3381 return self.expression(exp.Offset, this=this, expression=count) 3382 3383 def _parse_locks(self) -> t.List[exp.Lock]: 3384 locks = [] 3385 while True: 3386 if self._match_text_seq("FOR", "UPDATE"): 3387 update = True 3388 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3389 "LOCK", "IN", "SHARE", "MODE" 3390 ): 3391 update = False 3392 else: 3393 break 3394 3395 expressions = None 3396 if self._match_text_seq("OF"): 3397 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3398 3399 wait: t.Optional[bool | exp.Expression] = None 3400 if self._match_text_seq("NOWAIT"): 3401 wait = True 3402 elif self._match_text_seq("WAIT"): 3403 wait = self._parse_primary() 3404 elif self._match_text_seq("SKIP", "LOCKED"): 3405 wait = False 3406 3407 locks.append( 3408 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3409 ) 3410 3411 return locks 3412 3413 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3414 while this and self._match_set(self.SET_OPERATIONS): 3415 token_type = self._prev.token_type 3416 3417 if token_type == TokenType.UNION: 3418 operation = exp.Union 3419 elif token_type == TokenType.EXCEPT: 3420 operation = exp.Except 3421 else: 3422 operation = exp.Intersect 3423 3424 comments = self._prev.comments 3425 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3426 by_name = self._match_text_seq("BY", "NAME") 3427 expression = self._parse_select(nested=True, parse_set_operation=False) 3428 3429 this = self.expression( 3430 operation, 3431 comments=comments, 3432 this=this, 3433 distinct=distinct, 3434 by_name=by_name, 3435 expression=expression, 3436 ) 3437 3438 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3439 expression = this.expression 3440 3441 if expression: 3442 for arg in self.UNION_MODIFIERS: 3443 expr = expression.args.get(arg) 3444 if expr: 3445 this.set(arg, expr.pop()) 3446 3447 return this 3448 3449 def _parse_expression(self) -> t.Optional[exp.Expression]: 3450 return self._parse_alias(self._parse_conjunction()) 3451 3452 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3453 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3454 3455 def _parse_equality(self) -> t.Optional[exp.Expression]: 3456 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3457 3458 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3459 return self._parse_tokens(self._parse_range, self.COMPARISON) 3460 3461 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3462 this = this or self._parse_bitwise() 3463 negate = self._match(TokenType.NOT) 3464 3465 if self._match_set(self.RANGE_PARSERS): 3466 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3467 if not expression: 3468 return this 3469 3470 this = expression 3471 elif self._match(TokenType.ISNULL): 3472 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3473 3474 # Postgres supports ISNULL and NOTNULL for conditions. 3475 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3476 if self._match(TokenType.NOTNULL): 3477 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3478 this = self.expression(exp.Not, this=this) 3479 3480 if negate: 3481 this = self.expression(exp.Not, this=this) 3482 3483 if self._match(TokenType.IS): 3484 this = self._parse_is(this) 3485 3486 return this 3487 3488 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3489 index = self._index - 1 3490 negate = self._match(TokenType.NOT) 3491 3492 if self._match_text_seq("DISTINCT", "FROM"): 3493 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3494 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3495 3496 expression = self._parse_null() or self._parse_boolean() 3497 if not expression: 3498 self._retreat(index) 3499 return None 3500 3501 this = self.expression(exp.Is, this=this, expression=expression) 3502 return self.expression(exp.Not, this=this) if negate else this 3503 3504 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3505 unnest = self._parse_unnest(with_alias=False) 3506 if unnest: 3507 this = self.expression(exp.In, this=this, unnest=unnest) 3508 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3509 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3510 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3511 3512 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3513 this = self.expression(exp.In, this=this, query=expressions[0]) 3514 else: 3515 this = self.expression(exp.In, this=this, expressions=expressions) 3516 3517 if matched_l_paren: 3518 self._match_r_paren(this) 3519 elif not self._match(TokenType.R_BRACKET, expression=this): 3520 self.raise_error("Expecting ]") 3521 else: 3522 this = self.expression(exp.In, this=this, field=self._parse_field()) 3523 3524 return this 3525 3526 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3527 low = self._parse_bitwise() 3528 self._match(TokenType.AND) 3529 high = self._parse_bitwise() 3530 return self.expression(exp.Between, this=this, low=low, high=high) 3531 3532 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3533 if not self._match(TokenType.ESCAPE): 3534 return this 3535 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3536 3537 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3538 index = self._index 3539 3540 if not self._match(TokenType.INTERVAL) and match_interval: 3541 return None 3542 3543 if self._match(TokenType.STRING, advance=False): 3544 this = self._parse_primary() 3545 else: 3546 this = self._parse_term() 3547 3548 if not this or ( 3549 isinstance(this, exp.Column) 3550 and not this.table 3551 and not this.this.quoted 3552 and this.name.upper() == "IS" 3553 ): 3554 self._retreat(index) 3555 return None 3556 3557 unit = self._parse_function() or ( 3558 not self._match(TokenType.ALIAS, advance=False) 3559 and self._parse_var(any_token=True, upper=True) 3560 ) 3561 3562 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3563 # each INTERVAL expression into this canonical form so it's easy to transpile 3564 if this and this.is_number: 3565 this = exp.Literal.string(this.name) 3566 elif this and this.is_string: 3567 parts = this.name.split() 3568 3569 if len(parts) == 2: 3570 if unit: 3571 # This is not actually a unit, it's something else (e.g. a "window side") 3572 unit = None 3573 self._retreat(self._index - 1) 3574 3575 this = exp.Literal.string(parts[0]) 3576 unit = self.expression(exp.Var, this=parts[1].upper()) 3577 3578 return self.expression(exp.Interval, this=this, unit=unit) 3579 3580 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3581 this = self._parse_term() 3582 3583 while True: 3584 if self._match_set(self.BITWISE): 3585 this = self.expression( 3586 self.BITWISE[self._prev.token_type], 3587 this=this, 3588 expression=self._parse_term(), 3589 ) 3590 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3591 this = self.expression( 3592 exp.DPipe, 3593 this=this, 3594 expression=self._parse_term(), 3595 safe=not self.dialect.STRICT_STRING_CONCAT, 3596 ) 3597 elif self._match(TokenType.DQMARK): 3598 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3599 elif self._match_pair(TokenType.LT, TokenType.LT): 3600 this = self.expression( 3601 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3602 ) 3603 elif self._match_pair(TokenType.GT, TokenType.GT): 3604 this = self.expression( 3605 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3606 ) 3607 else: 3608 break 3609 3610 return this 3611 3612 def _parse_term(self) -> t.Optional[exp.Expression]: 3613 return self._parse_tokens(self._parse_factor, self.TERM) 3614 3615 def _parse_factor(self) -> t.Optional[exp.Expression]: 3616 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3617 this = parse_method() 3618 3619 while self._match_set(self.FACTOR): 3620 this = self.expression( 3621 self.FACTOR[self._prev.token_type], 3622 this=this, 3623 comments=self._prev_comments, 3624 expression=parse_method(), 3625 ) 3626 if isinstance(this, exp.Div): 3627 this.args["typed"] = self.dialect.TYPED_DIVISION 3628 this.args["safe"] = self.dialect.SAFE_DIVISION 3629 3630 return this 3631 3632 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3633 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3634 3635 def _parse_unary(self) -> t.Optional[exp.Expression]: 3636 if self._match_set(self.UNARY_PARSERS): 3637 return self.UNARY_PARSERS[self._prev.token_type](self) 3638 return self._parse_at_time_zone(self._parse_type()) 3639 3640 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3641 interval = parse_interval and self._parse_interval() 3642 if interval: 3643 # Convert INTERVAL 'val_1' unit_1 ... 'val_n' unit_n into a sum of intervals 3644 while self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3645 interval = self.expression( # type: ignore 3646 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3647 ) 3648 3649 return interval 3650 3651 index = self._index 3652 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3653 this = self._parse_column() 3654 3655 if data_type: 3656 if isinstance(this, exp.Literal): 3657 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3658 if parser: 3659 return parser(self, this, data_type) 3660 return self.expression(exp.Cast, this=this, to=data_type) 3661 if not data_type.expressions: 3662 self._retreat(index) 3663 return self._parse_column() 3664 return self._parse_column_ops(data_type) 3665 3666 return this and self._parse_column_ops(this) 3667 3668 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3669 this = self._parse_type() 3670 if not this: 3671 return None 3672 3673 return self.expression( 3674 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3675 ) 3676 3677 def _parse_types( 3678 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3679 ) -> t.Optional[exp.Expression]: 3680 index = self._index 3681 3682 prefix = self._match_text_seq("SYSUDTLIB", ".") 3683 3684 if not self._match_set(self.TYPE_TOKENS): 3685 identifier = allow_identifiers and self._parse_id_var( 3686 any_token=False, tokens=(TokenType.VAR,) 3687 ) 3688 if identifier: 3689 tokens = self.dialect.tokenize(identifier.name) 3690 3691 if len(tokens) != 1: 3692 self.raise_error("Unexpected identifier", self._prev) 3693 3694 if tokens[0].token_type in self.TYPE_TOKENS: 3695 self._prev = tokens[0] 3696 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3697 type_name = identifier.name 3698 3699 while self._match(TokenType.DOT): 3700 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3701 3702 return exp.DataType.build(type_name, udt=True) 3703 else: 3704 self._retreat(self._index - 1) 3705 return None 3706 else: 3707 return None 3708 3709 type_token = self._prev.token_type 3710 3711 if type_token == TokenType.PSEUDO_TYPE: 3712 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3713 3714 if type_token == TokenType.OBJECT_IDENTIFIER: 3715 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3716 3717 nested = type_token in self.NESTED_TYPE_TOKENS 3718 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3719 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3720 expressions = None 3721 maybe_func = False 3722 3723 if self._match(TokenType.L_PAREN): 3724 if is_struct: 3725 expressions = self._parse_csv(self._parse_struct_types) 3726 elif nested: 3727 expressions = self._parse_csv( 3728 lambda: self._parse_types( 3729 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3730 ) 3731 ) 3732 elif type_token in self.ENUM_TYPE_TOKENS: 3733 expressions = self._parse_csv(self._parse_equality) 3734 elif is_aggregate: 3735 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3736 any_token=False, tokens=(TokenType.VAR,) 3737 ) 3738 if not func_or_ident or not self._match(TokenType.COMMA): 3739 return None 3740 expressions = self._parse_csv( 3741 lambda: self._parse_types( 3742 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3743 ) 3744 ) 3745 expressions.insert(0, func_or_ident) 3746 else: 3747 expressions = self._parse_csv(self._parse_type_size) 3748 3749 if not expressions or not self._match(TokenType.R_PAREN): 3750 self._retreat(index) 3751 return None 3752 3753 maybe_func = True 3754 3755 this: t.Optional[exp.Expression] = None 3756 values: t.Optional[t.List[exp.Expression]] = None 3757 3758 if nested and self._match(TokenType.LT): 3759 if is_struct: 3760 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3761 else: 3762 expressions = self._parse_csv( 3763 lambda: self._parse_types( 3764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3765 ) 3766 ) 3767 3768 if not self._match(TokenType.GT): 3769 self.raise_error("Expecting >") 3770 3771 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3772 values = self._parse_csv(self._parse_conjunction) 3773 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3774 3775 if type_token in self.TIMESTAMPS: 3776 if self._match_text_seq("WITH", "TIME", "ZONE"): 3777 maybe_func = False 3778 tz_type = ( 3779 exp.DataType.Type.TIMETZ 3780 if type_token in self.TIMES 3781 else exp.DataType.Type.TIMESTAMPTZ 3782 ) 3783 this = exp.DataType(this=tz_type, expressions=expressions) 3784 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3785 maybe_func = False 3786 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3787 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3788 maybe_func = False 3789 elif type_token == TokenType.INTERVAL: 3790 unit = self._parse_var() 3791 3792 if self._match_text_seq("TO"): 3793 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3794 else: 3795 span = None 3796 3797 if span or not unit: 3798 this = self.expression( 3799 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3800 ) 3801 else: 3802 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 3803 3804 if maybe_func and check_func: 3805 index2 = self._index 3806 peek = self._parse_string() 3807 3808 if not peek: 3809 self._retreat(index) 3810 return None 3811 3812 self._retreat(index2) 3813 3814 if not this: 3815 if self._match_text_seq("UNSIGNED"): 3816 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3817 if not unsigned_type_token: 3818 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3819 3820 type_token = unsigned_type_token or type_token 3821 3822 this = exp.DataType( 3823 this=exp.DataType.Type[type_token.value], 3824 expressions=expressions, 3825 nested=nested, 3826 values=values, 3827 prefix=prefix, 3828 ) 3829 3830 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3831 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3832 3833 return this 3834 3835 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 3836 index = self._index 3837 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3838 self._match(TokenType.COLON) 3839 column_def = self._parse_column_def(this) 3840 3841 if type_required and ( 3842 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 3843 ): 3844 self._retreat(index) 3845 return self._parse_types() 3846 3847 return column_def 3848 3849 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3850 if not self._match_text_seq("AT", "TIME", "ZONE"): 3851 return this 3852 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3853 3854 def _parse_column(self) -> t.Optional[exp.Expression]: 3855 this = self._parse_field() 3856 if isinstance(this, exp.Identifier): 3857 this = self.expression(exp.Column, this=this) 3858 elif not this: 3859 return self._parse_bracket(this) 3860 return self._parse_column_ops(this) 3861 3862 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3863 this = self._parse_bracket(this) 3864 3865 while self._match_set(self.COLUMN_OPERATORS): 3866 op_token = self._prev.token_type 3867 op = self.COLUMN_OPERATORS.get(op_token) 3868 3869 if op_token == TokenType.DCOLON: 3870 field = self._parse_types() 3871 if not field: 3872 self.raise_error("Expected type") 3873 elif op and self._curr: 3874 self._advance() 3875 value = self._prev.text 3876 field = ( 3877 exp.Literal.number(value) 3878 if self._prev.token_type == TokenType.NUMBER 3879 else exp.Literal.string(value) 3880 ) 3881 else: 3882 field = self._parse_field(anonymous_func=True, any_token=True) 3883 3884 if isinstance(field, exp.Func): 3885 # bigquery allows function calls like x.y.count(...) 3886 # SAFE.SUBSTR(...) 3887 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3888 this = self._replace_columns_with_dots(this) 3889 3890 if op: 3891 this = op(self, this, field) 3892 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3893 this = self.expression( 3894 exp.Column, 3895 this=field, 3896 table=this.this, 3897 db=this.args.get("table"), 3898 catalog=this.args.get("db"), 3899 ) 3900 else: 3901 this = self.expression(exp.Dot, this=this, expression=field) 3902 this = self._parse_bracket(this) 3903 return this 3904 3905 def _parse_primary(self) -> t.Optional[exp.Expression]: 3906 if self._match_set(self.PRIMARY_PARSERS): 3907 token_type = self._prev.token_type 3908 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3909 3910 if token_type == TokenType.STRING: 3911 expressions = [primary] 3912 while self._match(TokenType.STRING): 3913 expressions.append(exp.Literal.string(self._prev.text)) 3914 3915 if len(expressions) > 1: 3916 return self.expression(exp.Concat, expressions=expressions) 3917 3918 return primary 3919 3920 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3921 return exp.Literal.number(f"0.{self._prev.text}") 3922 3923 if self._match(TokenType.L_PAREN): 3924 comments = self._prev_comments 3925 query = self._parse_select() 3926 3927 if query: 3928 expressions = [query] 3929 else: 3930 expressions = self._parse_expressions() 3931 3932 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3933 3934 if isinstance(this, exp.Subqueryable): 3935 this = self._parse_set_operations( 3936 self._parse_subquery(this=this, parse_alias=False) 3937 ) 3938 elif len(expressions) > 1: 3939 this = self.expression(exp.Tuple, expressions=expressions) 3940 else: 3941 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3942 3943 if this: 3944 this.add_comments(comments) 3945 3946 self._match_r_paren(expression=this) 3947 return this 3948 3949 return None 3950 3951 def _parse_field( 3952 self, 3953 any_token: bool = False, 3954 tokens: t.Optional[t.Collection[TokenType]] = None, 3955 anonymous_func: bool = False, 3956 ) -> t.Optional[exp.Expression]: 3957 return ( 3958 self._parse_primary() 3959 or self._parse_function(anonymous=anonymous_func) 3960 or self._parse_id_var(any_token=any_token, tokens=tokens) 3961 ) 3962 3963 def _parse_function( 3964 self, 3965 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3966 anonymous: bool = False, 3967 optional_parens: bool = True, 3968 ) -> t.Optional[exp.Expression]: 3969 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3970 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3971 fn_syntax = False 3972 if ( 3973 self._match(TokenType.L_BRACE, advance=False) 3974 and self._next 3975 and self._next.text.upper() == "FN" 3976 ): 3977 self._advance(2) 3978 fn_syntax = True 3979 3980 func = self._parse_function_call( 3981 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3982 ) 3983 3984 if fn_syntax: 3985 self._match(TokenType.R_BRACE) 3986 3987 return func 3988 3989 def _parse_function_call( 3990 self, 3991 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3992 anonymous: bool = False, 3993 optional_parens: bool = True, 3994 ) -> t.Optional[exp.Expression]: 3995 if not self._curr: 3996 return None 3997 3998 comments = self._curr.comments 3999 token_type = self._curr.token_type 4000 this = self._curr.text 4001 upper = this.upper() 4002 4003 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4004 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4005 self._advance() 4006 return parser(self) 4007 4008 if not self._next or self._next.token_type != TokenType.L_PAREN: 4009 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4010 self._advance() 4011 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4012 4013 return None 4014 4015 if token_type not in self.FUNC_TOKENS: 4016 return None 4017 4018 self._advance(2) 4019 4020 parser = self.FUNCTION_PARSERS.get(upper) 4021 if parser and not anonymous: 4022 this = parser(self) 4023 else: 4024 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4025 4026 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4027 this = self.expression(subquery_predicate, this=self._parse_select()) 4028 self._match_r_paren() 4029 return this 4030 4031 if functions is None: 4032 functions = self.FUNCTIONS 4033 4034 function = functions.get(upper) 4035 4036 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4037 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4038 4039 if function and not anonymous: 4040 if "dialect" in function.__code__.co_varnames: 4041 func = function(args, dialect=self.dialect) 4042 else: 4043 func = function(args) 4044 4045 func = self.validate_expression(func, args) 4046 if not self.dialect.NORMALIZE_FUNCTIONS: 4047 func.meta["name"] = this 4048 4049 this = func 4050 else: 4051 this = self.expression(exp.Anonymous, this=this, expressions=args) 4052 4053 if isinstance(this, exp.Expression): 4054 this.add_comments(comments) 4055 4056 self._match_r_paren(this) 4057 return self._parse_window(this) 4058 4059 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4060 return self._parse_column_def(self._parse_id_var()) 4061 4062 def _parse_user_defined_function( 4063 self, kind: t.Optional[TokenType] = None 4064 ) -> t.Optional[exp.Expression]: 4065 this = self._parse_id_var() 4066 4067 while self._match(TokenType.DOT): 4068 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4069 4070 if not self._match(TokenType.L_PAREN): 4071 return this 4072 4073 expressions = self._parse_csv(self._parse_function_parameter) 4074 self._match_r_paren() 4075 return self.expression( 4076 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4077 ) 4078 4079 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4080 literal = self._parse_primary() 4081 if literal: 4082 return self.expression(exp.Introducer, this=token.text, expression=literal) 4083 4084 return self.expression(exp.Identifier, this=token.text) 4085 4086 def _parse_session_parameter(self) -> exp.SessionParameter: 4087 kind = None 4088 this = self._parse_id_var() or self._parse_primary() 4089 4090 if this and self._match(TokenType.DOT): 4091 kind = this.name 4092 this = self._parse_var() or self._parse_primary() 4093 4094 return self.expression(exp.SessionParameter, this=this, kind=kind) 4095 4096 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4097 index = self._index 4098 4099 if self._match(TokenType.L_PAREN): 4100 expressions = t.cast( 4101 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4102 ) 4103 4104 if not self._match(TokenType.R_PAREN): 4105 self._retreat(index) 4106 else: 4107 expressions = [self._parse_id_var()] 4108 4109 if self._match_set(self.LAMBDAS): 4110 return self.LAMBDAS[self._prev.token_type](self, expressions) 4111 4112 self._retreat(index) 4113 4114 this: t.Optional[exp.Expression] 4115 4116 if self._match(TokenType.DISTINCT): 4117 this = self.expression( 4118 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4119 ) 4120 else: 4121 this = self._parse_select_or_expression(alias=alias) 4122 4123 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 4124 4125 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4126 index = self._index 4127 4128 if not self.errors: 4129 try: 4130 if self._parse_select(nested=True): 4131 return this 4132 except ParseError: 4133 pass 4134 finally: 4135 self.errors.clear() 4136 self._retreat(index) 4137 4138 if not self._match(TokenType.L_PAREN): 4139 return this 4140 4141 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4142 4143 self._match_r_paren() 4144 return self.expression(exp.Schema, this=this, expressions=args) 4145 4146 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4147 return self._parse_column_def(self._parse_field(any_token=True)) 4148 4149 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4150 # column defs are not really columns, they're identifiers 4151 if isinstance(this, exp.Column): 4152 this = this.this 4153 4154 kind = self._parse_types(schema=True) 4155 4156 if self._match_text_seq("FOR", "ORDINALITY"): 4157 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4158 4159 constraints: t.List[exp.Expression] = [] 4160 4161 if not kind and self._match(TokenType.ALIAS): 4162 constraints.append( 4163 self.expression( 4164 exp.ComputedColumnConstraint, 4165 this=self._parse_conjunction(), 4166 persisted=self._match_text_seq("PERSISTED"), 4167 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4168 ) 4169 ) 4170 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4171 self._match(TokenType.ALIAS) 4172 constraints.append( 4173 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4174 ) 4175 4176 while True: 4177 constraint = self._parse_column_constraint() 4178 if not constraint: 4179 break 4180 constraints.append(constraint) 4181 4182 if not kind and not constraints: 4183 return this 4184 4185 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4186 4187 def _parse_auto_increment( 4188 self, 4189 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4190 start = None 4191 increment = None 4192 4193 if self._match(TokenType.L_PAREN, advance=False): 4194 args = self._parse_wrapped_csv(self._parse_bitwise) 4195 start = seq_get(args, 0) 4196 increment = seq_get(args, 1) 4197 elif self._match_text_seq("START"): 4198 start = self._parse_bitwise() 4199 self._match_text_seq("INCREMENT") 4200 increment = self._parse_bitwise() 4201 4202 if start and increment: 4203 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4204 4205 return exp.AutoIncrementColumnConstraint() 4206 4207 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4208 if not self._match_text_seq("REFRESH"): 4209 self._retreat(self._index - 1) 4210 return None 4211 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4212 4213 def _parse_compress(self) -> exp.CompressColumnConstraint: 4214 if self._match(TokenType.L_PAREN, advance=False): 4215 return self.expression( 4216 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4217 ) 4218 4219 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4220 4221 def _parse_generated_as_identity( 4222 self, 4223 ) -> ( 4224 exp.GeneratedAsIdentityColumnConstraint 4225 | exp.ComputedColumnConstraint 4226 | exp.GeneratedAsRowColumnConstraint 4227 ): 4228 if self._match_text_seq("BY", "DEFAULT"): 4229 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4230 this = self.expression( 4231 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4232 ) 4233 else: 4234 self._match_text_seq("ALWAYS") 4235 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4236 4237 self._match(TokenType.ALIAS) 4238 4239 if self._match_text_seq("ROW"): 4240 start = self._match_text_seq("START") 4241 if not start: 4242 self._match(TokenType.END) 4243 hidden = self._match_text_seq("HIDDEN") 4244 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4245 4246 identity = self._match_text_seq("IDENTITY") 4247 4248 if self._match(TokenType.L_PAREN): 4249 if self._match(TokenType.START_WITH): 4250 this.set("start", self._parse_bitwise()) 4251 if self._match_text_seq("INCREMENT", "BY"): 4252 this.set("increment", self._parse_bitwise()) 4253 if self._match_text_seq("MINVALUE"): 4254 this.set("minvalue", self._parse_bitwise()) 4255 if self._match_text_seq("MAXVALUE"): 4256 this.set("maxvalue", self._parse_bitwise()) 4257 4258 if self._match_text_seq("CYCLE"): 4259 this.set("cycle", True) 4260 elif self._match_text_seq("NO", "CYCLE"): 4261 this.set("cycle", False) 4262 4263 if not identity: 4264 this.set("expression", self._parse_bitwise()) 4265 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4266 args = self._parse_csv(self._parse_bitwise) 4267 this.set("start", seq_get(args, 0)) 4268 this.set("increment", seq_get(args, 1)) 4269 4270 self._match_r_paren() 4271 4272 return this 4273 4274 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4275 self._match_text_seq("LENGTH") 4276 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4277 4278 def _parse_not_constraint( 4279 self, 4280 ) -> t.Optional[exp.Expression]: 4281 if self._match_text_seq("NULL"): 4282 return self.expression(exp.NotNullColumnConstraint) 4283 if self._match_text_seq("CASESPECIFIC"): 4284 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4285 if self._match_text_seq("FOR", "REPLICATION"): 4286 return self.expression(exp.NotForReplicationColumnConstraint) 4287 return None 4288 4289 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4290 if self._match(TokenType.CONSTRAINT): 4291 this = self._parse_id_var() 4292 else: 4293 this = None 4294 4295 if self._match_texts(self.CONSTRAINT_PARSERS): 4296 return self.expression( 4297 exp.ColumnConstraint, 4298 this=this, 4299 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4300 ) 4301 4302 return this 4303 4304 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4305 if not self._match(TokenType.CONSTRAINT): 4306 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4307 4308 this = self._parse_id_var() 4309 expressions = [] 4310 4311 while True: 4312 constraint = self._parse_unnamed_constraint() or self._parse_function() 4313 if not constraint: 4314 break 4315 expressions.append(constraint) 4316 4317 return self.expression(exp.Constraint, this=this, expressions=expressions) 4318 4319 def _parse_unnamed_constraint( 4320 self, constraints: t.Optional[t.Collection[str]] = None 4321 ) -> t.Optional[exp.Expression]: 4322 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4323 constraints or self.CONSTRAINT_PARSERS 4324 ): 4325 return None 4326 4327 constraint = self._prev.text.upper() 4328 if constraint not in self.CONSTRAINT_PARSERS: 4329 self.raise_error(f"No parser found for schema constraint {constraint}.") 4330 4331 return self.CONSTRAINT_PARSERS[constraint](self) 4332 4333 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4334 self._match_text_seq("KEY") 4335 return self.expression( 4336 exp.UniqueColumnConstraint, 4337 this=self._parse_schema(self._parse_id_var(any_token=False)), 4338 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4339 ) 4340 4341 def _parse_key_constraint_options(self) -> t.List[str]: 4342 options = [] 4343 while True: 4344 if not self._curr: 4345 break 4346 4347 if self._match(TokenType.ON): 4348 action = None 4349 on = self._advance_any() and self._prev.text 4350 4351 if self._match_text_seq("NO", "ACTION"): 4352 action = "NO ACTION" 4353 elif self._match_text_seq("CASCADE"): 4354 action = "CASCADE" 4355 elif self._match_text_seq("RESTRICT"): 4356 action = "RESTRICT" 4357 elif self._match_pair(TokenType.SET, TokenType.NULL): 4358 action = "SET NULL" 4359 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4360 action = "SET DEFAULT" 4361 else: 4362 self.raise_error("Invalid key constraint") 4363 4364 options.append(f"ON {on} {action}") 4365 elif self._match_text_seq("NOT", "ENFORCED"): 4366 options.append("NOT ENFORCED") 4367 elif self._match_text_seq("DEFERRABLE"): 4368 options.append("DEFERRABLE") 4369 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4370 options.append("INITIALLY DEFERRED") 4371 elif self._match_text_seq("NORELY"): 4372 options.append("NORELY") 4373 elif self._match_text_seq("MATCH", "FULL"): 4374 options.append("MATCH FULL") 4375 else: 4376 break 4377 4378 return options 4379 4380 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4381 if match and not self._match(TokenType.REFERENCES): 4382 return None 4383 4384 expressions = None 4385 this = self._parse_table(schema=True) 4386 options = self._parse_key_constraint_options() 4387 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4388 4389 def _parse_foreign_key(self) -> exp.ForeignKey: 4390 expressions = self._parse_wrapped_id_vars() 4391 reference = self._parse_references() 4392 options = {} 4393 4394 while self._match(TokenType.ON): 4395 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4396 self.raise_error("Expected DELETE or UPDATE") 4397 4398 kind = self._prev.text.lower() 4399 4400 if self._match_text_seq("NO", "ACTION"): 4401 action = "NO ACTION" 4402 elif self._match(TokenType.SET): 4403 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4404 action = "SET " + self._prev.text.upper() 4405 else: 4406 self._advance() 4407 action = self._prev.text.upper() 4408 4409 options[kind] = action 4410 4411 return self.expression( 4412 exp.ForeignKey, 4413 expressions=expressions, 4414 reference=reference, 4415 **options, # type: ignore 4416 ) 4417 4418 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4419 return self._parse_field() 4420 4421 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4422 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4423 self._retreat(self._index - 1) 4424 return None 4425 4426 id_vars = self._parse_wrapped_id_vars() 4427 return self.expression( 4428 exp.PeriodForSystemTimeConstraint, 4429 this=seq_get(id_vars, 0), 4430 expression=seq_get(id_vars, 1), 4431 ) 4432 4433 def _parse_primary_key( 4434 self, wrapped_optional: bool = False, in_props: bool = False 4435 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4436 desc = ( 4437 self._match_set((TokenType.ASC, TokenType.DESC)) 4438 and self._prev.token_type == TokenType.DESC 4439 ) 4440 4441 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4442 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4443 4444 expressions = self._parse_wrapped_csv( 4445 self._parse_primary_key_part, optional=wrapped_optional 4446 ) 4447 options = self._parse_key_constraint_options() 4448 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4449 4450 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4451 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4452 4453 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4454 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4455 return this 4456 4457 bracket_kind = self._prev.token_type 4458 expressions = self._parse_csv( 4459 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4460 ) 4461 4462 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4463 self.raise_error("Expected ]") 4464 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4465 self.raise_error("Expected }") 4466 4467 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4468 if bracket_kind == TokenType.L_BRACE: 4469 this = self.expression(exp.Struct, expressions=expressions) 4470 elif not this or this.name.upper() == "ARRAY": 4471 this = self.expression(exp.Array, expressions=expressions) 4472 else: 4473 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4474 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4475 4476 self._add_comments(this) 4477 return self._parse_bracket(this) 4478 4479 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4480 if self._match(TokenType.COLON): 4481 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4482 return this 4483 4484 def _parse_case(self) -> t.Optional[exp.Expression]: 4485 ifs = [] 4486 default = None 4487 4488 comments = self._prev_comments 4489 expression = self._parse_conjunction() 4490 4491 while self._match(TokenType.WHEN): 4492 this = self._parse_conjunction() 4493 self._match(TokenType.THEN) 4494 then = self._parse_conjunction() 4495 ifs.append(self.expression(exp.If, this=this, true=then)) 4496 4497 if self._match(TokenType.ELSE): 4498 default = self._parse_conjunction() 4499 4500 if not self._match(TokenType.END): 4501 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4502 default = exp.column("interval") 4503 else: 4504 self.raise_error("Expected END after CASE", self._prev) 4505 4506 return self._parse_window( 4507 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4508 ) 4509 4510 def _parse_if(self) -> t.Optional[exp.Expression]: 4511 if self._match(TokenType.L_PAREN): 4512 args = self._parse_csv(self._parse_conjunction) 4513 this = self.validate_expression(exp.If.from_arg_list(args), args) 4514 self._match_r_paren() 4515 else: 4516 index = self._index - 1 4517 4518 if self.NO_PAREN_IF_COMMANDS and index == 0: 4519 return self._parse_as_command(self._prev) 4520 4521 condition = self._parse_conjunction() 4522 4523 if not condition: 4524 self._retreat(index) 4525 return None 4526 4527 self._match(TokenType.THEN) 4528 true = self._parse_conjunction() 4529 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4530 self._match(TokenType.END) 4531 this = self.expression(exp.If, this=condition, true=true, false=false) 4532 4533 return self._parse_window(this) 4534 4535 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4536 if not self._match_text_seq("VALUE", "FOR"): 4537 self._retreat(self._index - 1) 4538 return None 4539 4540 return self.expression( 4541 exp.NextValueFor, 4542 this=self._parse_column(), 4543 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4544 ) 4545 4546 def _parse_extract(self) -> exp.Extract: 4547 this = self._parse_function() or self._parse_var() or self._parse_type() 4548 4549 if self._match(TokenType.FROM): 4550 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4551 4552 if not self._match(TokenType.COMMA): 4553 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4554 4555 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4556 4557 def _parse_any_value(self) -> exp.AnyValue: 4558 this = self._parse_lambda() 4559 is_max = None 4560 having = None 4561 4562 if self._match(TokenType.HAVING): 4563 self._match_texts(("MAX", "MIN")) 4564 is_max = self._prev.text == "MAX" 4565 having = self._parse_column() 4566 4567 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4568 4569 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4570 this = self._parse_conjunction() 4571 4572 if not self._match(TokenType.ALIAS): 4573 if self._match(TokenType.COMMA): 4574 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4575 4576 self.raise_error("Expected AS after CAST") 4577 4578 fmt = None 4579 to = self._parse_types() 4580 4581 if self._match(TokenType.FORMAT): 4582 fmt_string = self._parse_string() 4583 fmt = self._parse_at_time_zone(fmt_string) 4584 4585 if not to: 4586 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4587 if to.this in exp.DataType.TEMPORAL_TYPES: 4588 this = self.expression( 4589 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4590 this=this, 4591 format=exp.Literal.string( 4592 format_time( 4593 fmt_string.this if fmt_string else "", 4594 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4595 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4596 ) 4597 ), 4598 ) 4599 4600 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4601 this.set("zone", fmt.args["zone"]) 4602 return this 4603 elif not to: 4604 self.raise_error("Expected TYPE after CAST") 4605 elif isinstance(to, exp.Identifier): 4606 to = exp.DataType.build(to.name, udt=True) 4607 elif to.this == exp.DataType.Type.CHAR: 4608 if self._match(TokenType.CHARACTER_SET): 4609 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4610 4611 return self.expression( 4612 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4613 ) 4614 4615 def _parse_string_agg(self) -> exp.Expression: 4616 if self._match(TokenType.DISTINCT): 4617 args: t.List[t.Optional[exp.Expression]] = [ 4618 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4619 ] 4620 if self._match(TokenType.COMMA): 4621 args.extend(self._parse_csv(self._parse_conjunction)) 4622 else: 4623 args = self._parse_csv(self._parse_conjunction) # type: ignore 4624 4625 index = self._index 4626 if not self._match(TokenType.R_PAREN) and args: 4627 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4628 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4629 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4630 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4631 4632 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4633 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4634 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4635 if not self._match_text_seq("WITHIN", "GROUP"): 4636 self._retreat(index) 4637 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4638 4639 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4640 order = self._parse_order(this=seq_get(args, 0)) 4641 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4642 4643 def _parse_convert( 4644 self, strict: bool, safe: t.Optional[bool] = None 4645 ) -> t.Optional[exp.Expression]: 4646 this = self._parse_bitwise() 4647 4648 if self._match(TokenType.USING): 4649 to: t.Optional[exp.Expression] = self.expression( 4650 exp.CharacterSet, this=self._parse_var() 4651 ) 4652 elif self._match(TokenType.COMMA): 4653 to = self._parse_types() 4654 else: 4655 to = None 4656 4657 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4658 4659 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4660 """ 4661 There are generally two variants of the DECODE function: 4662 4663 - DECODE(bin, charset) 4664 - DECODE(expression, search, result [, search, result] ... [, default]) 4665 4666 The second variant will always be parsed into a CASE expression. Note that NULL 4667 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4668 instead of relying on pattern matching. 4669 """ 4670 args = self._parse_csv(self._parse_conjunction) 4671 4672 if len(args) < 3: 4673 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4674 4675 expression, *expressions = args 4676 if not expression: 4677 return None 4678 4679 ifs = [] 4680 for search, result in zip(expressions[::2], expressions[1::2]): 4681 if not search or not result: 4682 return None 4683 4684 if isinstance(search, exp.Literal): 4685 ifs.append( 4686 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4687 ) 4688 elif isinstance(search, exp.Null): 4689 ifs.append( 4690 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4691 ) 4692 else: 4693 cond = exp.or_( 4694 exp.EQ(this=expression.copy(), expression=search), 4695 exp.and_( 4696 exp.Is(this=expression.copy(), expression=exp.Null()), 4697 exp.Is(this=search.copy(), expression=exp.Null()), 4698 copy=False, 4699 ), 4700 copy=False, 4701 ) 4702 ifs.append(exp.If(this=cond, true=result)) 4703 4704 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4705 4706 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4707 self._match_text_seq("KEY") 4708 key = self._parse_column() 4709 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4710 self._match_text_seq("VALUE") 4711 value = self._parse_bitwise() 4712 4713 if not key and not value: 4714 return None 4715 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4716 4717 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4718 if not this or not self._match_text_seq("FORMAT", "JSON"): 4719 return this 4720 4721 return self.expression(exp.FormatJson, this=this) 4722 4723 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4724 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4725 for value in values: 4726 if self._match_text_seq(value, "ON", on): 4727 return f"{value} ON {on}" 4728 4729 return None 4730 4731 @t.overload 4732 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 4733 ... 4734 4735 @t.overload 4736 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 4737 ... 4738 4739 def _parse_json_object(self, agg=False): 4740 star = self._parse_star() 4741 expressions = ( 4742 [star] 4743 if star 4744 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4745 ) 4746 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4747 4748 unique_keys = None 4749 if self._match_text_seq("WITH", "UNIQUE"): 4750 unique_keys = True 4751 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4752 unique_keys = False 4753 4754 self._match_text_seq("KEYS") 4755 4756 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4757 self._parse_type() 4758 ) 4759 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4760 4761 return self.expression( 4762 exp.JSONObjectAgg if agg else exp.JSONObject, 4763 expressions=expressions, 4764 null_handling=null_handling, 4765 unique_keys=unique_keys, 4766 return_type=return_type, 4767 encoding=encoding, 4768 ) 4769 4770 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4771 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4772 if not self._match_text_seq("NESTED"): 4773 this = self._parse_id_var() 4774 kind = self._parse_types(allow_identifiers=False) 4775 nested = None 4776 else: 4777 this = None 4778 kind = None 4779 nested = True 4780 4781 path = self._match_text_seq("PATH") and self._parse_string() 4782 nested_schema = nested and self._parse_json_schema() 4783 4784 return self.expression( 4785 exp.JSONColumnDef, 4786 this=this, 4787 kind=kind, 4788 path=path, 4789 nested_schema=nested_schema, 4790 ) 4791 4792 def _parse_json_schema(self) -> exp.JSONSchema: 4793 self._match_text_seq("COLUMNS") 4794 return self.expression( 4795 exp.JSONSchema, 4796 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4797 ) 4798 4799 def _parse_json_table(self) -> exp.JSONTable: 4800 this = self._parse_format_json(self._parse_bitwise()) 4801 path = self._match(TokenType.COMMA) and self._parse_string() 4802 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4803 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4804 schema = self._parse_json_schema() 4805 4806 return exp.JSONTable( 4807 this=this, 4808 schema=schema, 4809 path=path, 4810 error_handling=error_handling, 4811 empty_handling=empty_handling, 4812 ) 4813 4814 def _parse_match_against(self) -> exp.MatchAgainst: 4815 expressions = self._parse_csv(self._parse_column) 4816 4817 self._match_text_seq(")", "AGAINST", "(") 4818 4819 this = self._parse_string() 4820 4821 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4822 modifier = "IN NATURAL LANGUAGE MODE" 4823 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4824 modifier = f"{modifier} WITH QUERY EXPANSION" 4825 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4826 modifier = "IN BOOLEAN MODE" 4827 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4828 modifier = "WITH QUERY EXPANSION" 4829 else: 4830 modifier = None 4831 4832 return self.expression( 4833 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4834 ) 4835 4836 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4837 def _parse_open_json(self) -> exp.OpenJSON: 4838 this = self._parse_bitwise() 4839 path = self._match(TokenType.COMMA) and self._parse_string() 4840 4841 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4842 this = self._parse_field(any_token=True) 4843 kind = self._parse_types() 4844 path = self._parse_string() 4845 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4846 4847 return self.expression( 4848 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4849 ) 4850 4851 expressions = None 4852 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4853 self._match_l_paren() 4854 expressions = self._parse_csv(_parse_open_json_column_def) 4855 4856 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4857 4858 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4859 args = self._parse_csv(self._parse_bitwise) 4860 4861 if self._match(TokenType.IN): 4862 return self.expression( 4863 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4864 ) 4865 4866 if haystack_first: 4867 haystack = seq_get(args, 0) 4868 needle = seq_get(args, 1) 4869 else: 4870 needle = seq_get(args, 0) 4871 haystack = seq_get(args, 1) 4872 4873 return self.expression( 4874 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4875 ) 4876 4877 def _parse_predict(self) -> exp.Predict: 4878 self._match_text_seq("MODEL") 4879 this = self._parse_table() 4880 4881 self._match(TokenType.COMMA) 4882 self._match_text_seq("TABLE") 4883 4884 return self.expression( 4885 exp.Predict, 4886 this=this, 4887 expression=self._parse_table(), 4888 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4889 ) 4890 4891 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4892 args = self._parse_csv(self._parse_table) 4893 return exp.JoinHint(this=func_name.upper(), expressions=args) 4894 4895 def _parse_substring(self) -> exp.Substring: 4896 # Postgres supports the form: substring(string [from int] [for int]) 4897 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4898 4899 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4900 4901 if self._match(TokenType.FROM): 4902 args.append(self._parse_bitwise()) 4903 if self._match(TokenType.FOR): 4904 args.append(self._parse_bitwise()) 4905 4906 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4907 4908 def _parse_trim(self) -> exp.Trim: 4909 # https://www.w3resource.com/sql/character-functions/trim.php 4910 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4911 4912 position = None 4913 collation = None 4914 expression = None 4915 4916 if self._match_texts(self.TRIM_TYPES): 4917 position = self._prev.text.upper() 4918 4919 this = self._parse_bitwise() 4920 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4921 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4922 expression = self._parse_bitwise() 4923 4924 if invert_order: 4925 this, expression = expression, this 4926 4927 if self._match(TokenType.COLLATE): 4928 collation = self._parse_bitwise() 4929 4930 return self.expression( 4931 exp.Trim, this=this, position=position, expression=expression, collation=collation 4932 ) 4933 4934 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4935 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4936 4937 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4938 return self._parse_window(self._parse_id_var(), alias=True) 4939 4940 def _parse_respect_or_ignore_nulls( 4941 self, this: t.Optional[exp.Expression] 4942 ) -> t.Optional[exp.Expression]: 4943 if self._match_text_seq("IGNORE", "NULLS"): 4944 return self.expression(exp.IgnoreNulls, this=this) 4945 if self._match_text_seq("RESPECT", "NULLS"): 4946 return self.expression(exp.RespectNulls, this=this) 4947 return this 4948 4949 def _parse_window( 4950 self, this: t.Optional[exp.Expression], alias: bool = False 4951 ) -> t.Optional[exp.Expression]: 4952 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4953 self._match(TokenType.WHERE) 4954 this = self.expression( 4955 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4956 ) 4957 self._match_r_paren() 4958 4959 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4960 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4961 if self._match_text_seq("WITHIN", "GROUP"): 4962 order = self._parse_wrapped(self._parse_order) 4963 this = self.expression(exp.WithinGroup, this=this, expression=order) 4964 4965 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4966 # Some dialects choose to implement and some do not. 4967 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4968 4969 # There is some code above in _parse_lambda that handles 4970 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4971 4972 # The below changes handle 4973 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4974 4975 # Oracle allows both formats 4976 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4977 # and Snowflake chose to do the same for familiarity 4978 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4979 if isinstance(this, exp.AggFunc): 4980 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 4981 4982 if ignore_respect and ignore_respect is not this: 4983 ignore_respect.replace(ignore_respect.this) 4984 this = self.expression(ignore_respect.__class__, this=this) 4985 4986 this = self._parse_respect_or_ignore_nulls(this) 4987 4988 # bigquery select from window x AS (partition by ...) 4989 if alias: 4990 over = None 4991 self._match(TokenType.ALIAS) 4992 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4993 return this 4994 else: 4995 over = self._prev.text.upper() 4996 4997 if not self._match(TokenType.L_PAREN): 4998 return self.expression( 4999 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5000 ) 5001 5002 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5003 5004 first = self._match(TokenType.FIRST) 5005 if self._match_text_seq("LAST"): 5006 first = False 5007 5008 partition, order = self._parse_partition_and_order() 5009 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5010 5011 if kind: 5012 self._match(TokenType.BETWEEN) 5013 start = self._parse_window_spec() 5014 self._match(TokenType.AND) 5015 end = self._parse_window_spec() 5016 5017 spec = self.expression( 5018 exp.WindowSpec, 5019 kind=kind, 5020 start=start["value"], 5021 start_side=start["side"], 5022 end=end["value"], 5023 end_side=end["side"], 5024 ) 5025 else: 5026 spec = None 5027 5028 self._match_r_paren() 5029 5030 window = self.expression( 5031 exp.Window, 5032 this=this, 5033 partition_by=partition, 5034 order=order, 5035 spec=spec, 5036 alias=window_alias, 5037 over=over, 5038 first=first, 5039 ) 5040 5041 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5042 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5043 return self._parse_window(window, alias=alias) 5044 5045 return window 5046 5047 def _parse_partition_and_order( 5048 self, 5049 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5050 return self._parse_partition_by(), self._parse_order() 5051 5052 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5053 self._match(TokenType.BETWEEN) 5054 5055 return { 5056 "value": ( 5057 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5058 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5059 or self._parse_bitwise() 5060 ), 5061 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5062 } 5063 5064 def _parse_alias( 5065 self, this: t.Optional[exp.Expression], explicit: bool = False 5066 ) -> t.Optional[exp.Expression]: 5067 any_token = self._match(TokenType.ALIAS) 5068 comments = self._prev_comments 5069 5070 if explicit and not any_token: 5071 return this 5072 5073 if self._match(TokenType.L_PAREN): 5074 aliases = self.expression( 5075 exp.Aliases, 5076 comments=comments, 5077 this=this, 5078 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5079 ) 5080 self._match_r_paren(aliases) 5081 return aliases 5082 5083 alias = self._parse_id_var(any_token) or ( 5084 self.STRING_ALIASES and self._parse_string_as_identifier() 5085 ) 5086 5087 if alias: 5088 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5089 column = this.this 5090 5091 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5092 if not this.comments and column and column.comments: 5093 this.comments = column.comments 5094 column.comments = None 5095 5096 return this 5097 5098 def _parse_id_var( 5099 self, 5100 any_token: bool = True, 5101 tokens: t.Optional[t.Collection[TokenType]] = None, 5102 ) -> t.Optional[exp.Expression]: 5103 identifier = self._parse_identifier() 5104 5105 if identifier: 5106 return identifier 5107 5108 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5109 quoted = self._prev.token_type == TokenType.STRING 5110 return exp.Identifier(this=self._prev.text, quoted=quoted) 5111 5112 return None 5113 5114 def _parse_string(self) -> t.Optional[exp.Expression]: 5115 if self._match_set((TokenType.STRING, TokenType.RAW_STRING)): 5116 return self.PRIMARY_PARSERS[self._prev.token_type](self, self._prev) 5117 return self._parse_placeholder() 5118 5119 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5120 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5121 5122 def _parse_number(self) -> t.Optional[exp.Expression]: 5123 if self._match(TokenType.NUMBER): 5124 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 5125 return self._parse_placeholder() 5126 5127 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5128 if self._match(TokenType.IDENTIFIER): 5129 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5130 return self._parse_placeholder() 5131 5132 def _parse_var( 5133 self, 5134 any_token: bool = False, 5135 tokens: t.Optional[t.Collection[TokenType]] = None, 5136 upper: bool = False, 5137 ) -> t.Optional[exp.Expression]: 5138 if ( 5139 (any_token and self._advance_any()) 5140 or self._match(TokenType.VAR) 5141 or (self._match_set(tokens) if tokens else False) 5142 ): 5143 return self.expression( 5144 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5145 ) 5146 return self._parse_placeholder() 5147 5148 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5149 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5150 self._advance() 5151 return self._prev 5152 return None 5153 5154 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5155 return self._parse_var() or self._parse_string() 5156 5157 def _parse_null(self) -> t.Optional[exp.Expression]: 5158 if self._match_set(self.NULL_TOKENS): 5159 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5160 return self._parse_placeholder() 5161 5162 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5163 if self._match(TokenType.TRUE): 5164 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5165 if self._match(TokenType.FALSE): 5166 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5167 return self._parse_placeholder() 5168 5169 def _parse_star(self) -> t.Optional[exp.Expression]: 5170 if self._match(TokenType.STAR): 5171 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5172 return self._parse_placeholder() 5173 5174 def _parse_parameter(self) -> exp.Parameter: 5175 def _parse_parameter_part() -> t.Optional[exp.Expression]: 5176 return ( 5177 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 5178 ) 5179 5180 self._match(TokenType.L_BRACE) 5181 this = _parse_parameter_part() 5182 expression = self._match(TokenType.COLON) and _parse_parameter_part() 5183 self._match(TokenType.R_BRACE) 5184 5185 return self.expression(exp.Parameter, this=this, expression=expression) 5186 5187 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5188 if self._match_set(self.PLACEHOLDER_PARSERS): 5189 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5190 if placeholder: 5191 return placeholder 5192 self._advance(-1) 5193 return None 5194 5195 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5196 if not self._match(TokenType.EXCEPT): 5197 return None 5198 if self._match(TokenType.L_PAREN, advance=False): 5199 return self._parse_wrapped_csv(self._parse_column) 5200 5201 except_column = self._parse_column() 5202 return [except_column] if except_column else None 5203 5204 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5205 if not self._match(TokenType.REPLACE): 5206 return None 5207 if self._match(TokenType.L_PAREN, advance=False): 5208 return self._parse_wrapped_csv(self._parse_expression) 5209 5210 replace_expression = self._parse_expression() 5211 return [replace_expression] if replace_expression else None 5212 5213 def _parse_csv( 5214 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5215 ) -> t.List[exp.Expression]: 5216 parse_result = parse_method() 5217 items = [parse_result] if parse_result is not None else [] 5218 5219 while self._match(sep): 5220 self._add_comments(parse_result) 5221 parse_result = parse_method() 5222 if parse_result is not None: 5223 items.append(parse_result) 5224 5225 return items 5226 5227 def _parse_tokens( 5228 self, parse_method: t.Callable, expressions: t.Dict 5229 ) -> t.Optional[exp.Expression]: 5230 this = parse_method() 5231 5232 while self._match_set(expressions): 5233 this = self.expression( 5234 expressions[self._prev.token_type], 5235 this=this, 5236 comments=self._prev_comments, 5237 expression=parse_method(), 5238 ) 5239 5240 return this 5241 5242 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5243 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5244 5245 def _parse_wrapped_csv( 5246 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5247 ) -> t.List[exp.Expression]: 5248 return self._parse_wrapped( 5249 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5250 ) 5251 5252 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5253 wrapped = self._match(TokenType.L_PAREN) 5254 if not wrapped and not optional: 5255 self.raise_error("Expecting (") 5256 parse_result = parse_method() 5257 if wrapped: 5258 self._match_r_paren() 5259 return parse_result 5260 5261 def _parse_expressions(self) -> t.List[exp.Expression]: 5262 return self._parse_csv(self._parse_expression) 5263 5264 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5265 return self._parse_select() or self._parse_set_operations( 5266 self._parse_expression() if alias else self._parse_conjunction() 5267 ) 5268 5269 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5270 return self._parse_query_modifiers( 5271 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5272 ) 5273 5274 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5275 this = None 5276 if self._match_texts(self.TRANSACTION_KIND): 5277 this = self._prev.text 5278 5279 self._match_texts(("TRANSACTION", "WORK")) 5280 5281 modes = [] 5282 while True: 5283 mode = [] 5284 while self._match(TokenType.VAR): 5285 mode.append(self._prev.text) 5286 5287 if mode: 5288 modes.append(" ".join(mode)) 5289 if not self._match(TokenType.COMMA): 5290 break 5291 5292 return self.expression(exp.Transaction, this=this, modes=modes) 5293 5294 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5295 chain = None 5296 savepoint = None 5297 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5298 5299 self._match_texts(("TRANSACTION", "WORK")) 5300 5301 if self._match_text_seq("TO"): 5302 self._match_text_seq("SAVEPOINT") 5303 savepoint = self._parse_id_var() 5304 5305 if self._match(TokenType.AND): 5306 chain = not self._match_text_seq("NO") 5307 self._match_text_seq("CHAIN") 5308 5309 if is_rollback: 5310 return self.expression(exp.Rollback, savepoint=savepoint) 5311 5312 return self.expression(exp.Commit, chain=chain) 5313 5314 def _parse_refresh(self) -> exp.Refresh: 5315 self._match(TokenType.TABLE) 5316 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5317 5318 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5319 if not self._match_text_seq("ADD"): 5320 return None 5321 5322 self._match(TokenType.COLUMN) 5323 exists_column = self._parse_exists(not_=True) 5324 expression = self._parse_field_def() 5325 5326 if expression: 5327 expression.set("exists", exists_column) 5328 5329 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5330 if self._match_texts(("FIRST", "AFTER")): 5331 position = self._prev.text 5332 column_position = self.expression( 5333 exp.ColumnPosition, this=self._parse_column(), position=position 5334 ) 5335 expression.set("position", column_position) 5336 5337 return expression 5338 5339 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5340 drop = self._match(TokenType.DROP) and self._parse_drop() 5341 if drop and not isinstance(drop, exp.Command): 5342 drop.set("kind", drop.args.get("kind", "COLUMN")) 5343 return drop 5344 5345 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5346 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5347 return self.expression( 5348 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5349 ) 5350 5351 def _parse_add_constraint(self) -> exp.AddConstraint: 5352 this = None 5353 kind = self._prev.token_type 5354 5355 if kind == TokenType.CONSTRAINT: 5356 this = self._parse_id_var() 5357 5358 if self._match_text_seq("CHECK"): 5359 expression = self._parse_wrapped(self._parse_conjunction) 5360 enforced = self._match_text_seq("ENFORCED") or False 5361 5362 return self.expression( 5363 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5364 ) 5365 5366 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5367 expression = self._parse_foreign_key() 5368 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5369 expression = self._parse_primary_key() 5370 else: 5371 expression = None 5372 5373 return self.expression(exp.AddConstraint, this=this, expression=expression) 5374 5375 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5376 index = self._index - 1 5377 5378 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5379 return self._parse_csv(self._parse_add_constraint) 5380 5381 self._retreat(index) 5382 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5383 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5384 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5385 5386 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5387 self._match(TokenType.COLUMN) 5388 column = self._parse_field(any_token=True) 5389 5390 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5391 return self.expression(exp.AlterColumn, this=column, drop=True) 5392 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5393 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5394 if self._match(TokenType.COMMENT): 5395 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5396 5397 self._match_text_seq("SET", "DATA") 5398 return self.expression( 5399 exp.AlterColumn, 5400 this=column, 5401 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5402 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5403 using=self._match(TokenType.USING) and self._parse_conjunction(), 5404 ) 5405 5406 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5407 index = self._index - 1 5408 5409 partition_exists = self._parse_exists() 5410 if self._match(TokenType.PARTITION, advance=False): 5411 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5412 5413 self._retreat(index) 5414 return self._parse_csv(self._parse_drop_column) 5415 5416 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5417 if self._match(TokenType.COLUMN): 5418 exists = self._parse_exists() 5419 old_column = self._parse_column() 5420 to = self._match_text_seq("TO") 5421 new_column = self._parse_column() 5422 5423 if old_column is None or to is None or new_column is None: 5424 return None 5425 5426 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5427 5428 self._match_text_seq("TO") 5429 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5430 5431 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5432 start = self._prev 5433 5434 if not self._match(TokenType.TABLE): 5435 return self._parse_as_command(start) 5436 5437 exists = self._parse_exists() 5438 only = self._match_text_seq("ONLY") 5439 this = self._parse_table(schema=True) 5440 5441 if self._next: 5442 self._advance() 5443 5444 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5445 if parser: 5446 actions = ensure_list(parser(self)) 5447 5448 if not self._curr and actions: 5449 return self.expression( 5450 exp.AlterTable, 5451 this=this, 5452 exists=exists, 5453 actions=actions, 5454 only=only, 5455 ) 5456 5457 return self._parse_as_command(start) 5458 5459 def _parse_merge(self) -> exp.Merge: 5460 self._match(TokenType.INTO) 5461 target = self._parse_table() 5462 5463 if target and self._match(TokenType.ALIAS, advance=False): 5464 target.set("alias", self._parse_table_alias()) 5465 5466 self._match(TokenType.USING) 5467 using = self._parse_table() 5468 5469 self._match(TokenType.ON) 5470 on = self._parse_conjunction() 5471 5472 return self.expression( 5473 exp.Merge, 5474 this=target, 5475 using=using, 5476 on=on, 5477 expressions=self._parse_when_matched(), 5478 ) 5479 5480 def _parse_when_matched(self) -> t.List[exp.When]: 5481 whens = [] 5482 5483 while self._match(TokenType.WHEN): 5484 matched = not self._match(TokenType.NOT) 5485 self._match_text_seq("MATCHED") 5486 source = ( 5487 False 5488 if self._match_text_seq("BY", "TARGET") 5489 else self._match_text_seq("BY", "SOURCE") 5490 ) 5491 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5492 5493 self._match(TokenType.THEN) 5494 5495 if self._match(TokenType.INSERT): 5496 _this = self._parse_star() 5497 if _this: 5498 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5499 else: 5500 then = self.expression( 5501 exp.Insert, 5502 this=self._parse_value(), 5503 expression=self._match(TokenType.VALUES) and self._parse_value(), 5504 ) 5505 elif self._match(TokenType.UPDATE): 5506 expressions = self._parse_star() 5507 if expressions: 5508 then = self.expression(exp.Update, expressions=expressions) 5509 else: 5510 then = self.expression( 5511 exp.Update, 5512 expressions=self._match(TokenType.SET) 5513 and self._parse_csv(self._parse_equality), 5514 ) 5515 elif self._match(TokenType.DELETE): 5516 then = self.expression(exp.Var, this=self._prev.text) 5517 else: 5518 then = None 5519 5520 whens.append( 5521 self.expression( 5522 exp.When, 5523 matched=matched, 5524 source=source, 5525 condition=condition, 5526 then=then, 5527 ) 5528 ) 5529 return whens 5530 5531 def _parse_show(self) -> t.Optional[exp.Expression]: 5532 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5533 if parser: 5534 return parser(self) 5535 return self._parse_as_command(self._prev) 5536 5537 def _parse_set_item_assignment( 5538 self, kind: t.Optional[str] = None 5539 ) -> t.Optional[exp.Expression]: 5540 index = self._index 5541 5542 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5543 return self._parse_set_transaction(global_=kind == "GLOBAL") 5544 5545 left = self._parse_primary() or self._parse_id_var() 5546 assignment_delimiter = self._match_texts(("=", "TO")) 5547 5548 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5549 self._retreat(index) 5550 return None 5551 5552 right = self._parse_statement() or self._parse_id_var() 5553 this = self.expression(exp.EQ, this=left, expression=right) 5554 5555 return self.expression(exp.SetItem, this=this, kind=kind) 5556 5557 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5558 self._match_text_seq("TRANSACTION") 5559 characteristics = self._parse_csv( 5560 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5561 ) 5562 return self.expression( 5563 exp.SetItem, 5564 expressions=characteristics, 5565 kind="TRANSACTION", 5566 **{"global": global_}, # type: ignore 5567 ) 5568 5569 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5570 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5571 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5572 5573 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5574 index = self._index 5575 set_ = self.expression( 5576 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5577 ) 5578 5579 if self._curr: 5580 self._retreat(index) 5581 return self._parse_as_command(self._prev) 5582 5583 return set_ 5584 5585 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5586 for option in options: 5587 if self._match_text_seq(*option.split(" ")): 5588 return exp.var(option) 5589 return None 5590 5591 def _parse_as_command(self, start: Token) -> exp.Command: 5592 while self._curr: 5593 self._advance() 5594 text = self._find_sql(start, self._prev) 5595 size = len(start.text) 5596 self._warn_unsupported() 5597 return exp.Command(this=text[:size], expression=text[size:]) 5598 5599 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5600 settings = [] 5601 5602 self._match_l_paren() 5603 kind = self._parse_id_var() 5604 5605 if self._match(TokenType.L_PAREN): 5606 while True: 5607 key = self._parse_id_var() 5608 value = self._parse_primary() 5609 5610 if not key and value is None: 5611 break 5612 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5613 self._match(TokenType.R_PAREN) 5614 5615 self._match_r_paren() 5616 5617 return self.expression( 5618 exp.DictProperty, 5619 this=this, 5620 kind=kind.this if kind else None, 5621 settings=settings, 5622 ) 5623 5624 def _parse_dict_range(self, this: str) -> exp.DictRange: 5625 self._match_l_paren() 5626 has_min = self._match_text_seq("MIN") 5627 if has_min: 5628 min = self._parse_var() or self._parse_primary() 5629 self._match_text_seq("MAX") 5630 max = self._parse_var() or self._parse_primary() 5631 else: 5632 max = self._parse_var() or self._parse_primary() 5633 min = exp.Literal.number(0) 5634 self._match_r_paren() 5635 return self.expression(exp.DictRange, this=this, min=min, max=max) 5636 5637 def _parse_comprehension( 5638 self, this: t.Optional[exp.Expression] 5639 ) -> t.Optional[exp.Comprehension]: 5640 index = self._index 5641 expression = self._parse_column() 5642 if not self._match(TokenType.IN): 5643 self._retreat(index - 1) 5644 return None 5645 iterator = self._parse_column() 5646 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5647 return self.expression( 5648 exp.Comprehension, 5649 this=this, 5650 expression=expression, 5651 iterator=iterator, 5652 condition=condition, 5653 ) 5654 5655 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5656 if self._match(TokenType.HEREDOC_STRING): 5657 return self.expression(exp.Heredoc, this=self._prev.text) 5658 5659 if not self._match_text_seq("$"): 5660 return None 5661 5662 tags = ["$"] 5663 tag_text = None 5664 5665 if self._is_connected(): 5666 self._advance() 5667 tags.append(self._prev.text.upper()) 5668 else: 5669 self.raise_error("No closing $ found") 5670 5671 if tags[-1] != "$": 5672 if self._is_connected() and self._match_text_seq("$"): 5673 tag_text = tags[-1] 5674 tags.append("$") 5675 else: 5676 self.raise_error("No closing $ found") 5677 5678 heredoc_start = self._curr 5679 5680 while self._curr: 5681 if self._match_text_seq(*tags, advance=False): 5682 this = self._find_sql(heredoc_start, self._prev) 5683 self._advance(len(tags)) 5684 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5685 5686 self._advance() 5687 5688 self.raise_error(f"No closing {''.join(tags)} found") 5689 return None 5690 5691 def _find_parser( 5692 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5693 ) -> t.Optional[t.Callable]: 5694 if not self._curr: 5695 return None 5696 5697 index = self._index 5698 this = [] 5699 while True: 5700 # The current token might be multiple words 5701 curr = self._curr.text.upper() 5702 key = curr.split(" ") 5703 this.append(curr) 5704 5705 self._advance() 5706 result, trie = in_trie(trie, key) 5707 if result == TrieResult.FAILED: 5708 break 5709 5710 if result == TrieResult.EXISTS: 5711 subparser = parsers[" ".join(this)] 5712 return subparser 5713 5714 self._retreat(index) 5715 return None 5716 5717 def _match(self, token_type, advance=True, expression=None): 5718 if not self._curr: 5719 return None 5720 5721 if self._curr.token_type == token_type: 5722 if advance: 5723 self._advance() 5724 self._add_comments(expression) 5725 return True 5726 5727 return None 5728 5729 def _match_set(self, types, advance=True): 5730 if not self._curr: 5731 return None 5732 5733 if self._curr.token_type in types: 5734 if advance: 5735 self._advance() 5736 return True 5737 5738 return None 5739 5740 def _match_pair(self, token_type_a, token_type_b, advance=True): 5741 if not self._curr or not self._next: 5742 return None 5743 5744 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5745 if advance: 5746 self._advance(2) 5747 return True 5748 5749 return None 5750 5751 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5752 if not self._match(TokenType.L_PAREN, expression=expression): 5753 self.raise_error("Expecting (") 5754 5755 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5756 if not self._match(TokenType.R_PAREN, expression=expression): 5757 self.raise_error("Expecting )") 5758 5759 def _match_texts(self, texts, advance=True): 5760 if self._curr and self._curr.text.upper() in texts: 5761 if advance: 5762 self._advance() 5763 return True 5764 return None 5765 5766 def _match_text_seq(self, *texts, advance=True): 5767 index = self._index 5768 for text in texts: 5769 if self._curr and self._curr.text.upper() == text: 5770 self._advance() 5771 else: 5772 self._retreat(index) 5773 return None 5774 5775 if not advance: 5776 self._retreat(index) 5777 5778 return True 5779 5780 @t.overload 5781 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5782 ... 5783 5784 @t.overload 5785 def _replace_columns_with_dots( 5786 self, this: t.Optional[exp.Expression] 5787 ) -> t.Optional[exp.Expression]: 5788 ... 5789 5790 def _replace_columns_with_dots(self, this): 5791 if isinstance(this, exp.Dot): 5792 exp.replace_children(this, self._replace_columns_with_dots) 5793 elif isinstance(this, exp.Column): 5794 exp.replace_children(this, self._replace_columns_with_dots) 5795 table = this.args.get("table") 5796 this = ( 5797 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5798 ) 5799 5800 return this 5801 5802 def _replace_lambda( 5803 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5804 ) -> t.Optional[exp.Expression]: 5805 if not node: 5806 return node 5807 5808 for column in node.find_all(exp.Column): 5809 if column.parts[0].name in lambda_variables: 5810 dot_or_id = column.to_dot() if column.table else column.this 5811 parent = column.parent 5812 5813 while isinstance(parent, exp.Dot): 5814 if not isinstance(parent.parent, exp.Dot): 5815 parent.replace(dot_or_id) 5816 break 5817 parent = parent.parent 5818 else: 5819 if column is node: 5820 node = dot_or_id 5821 else: 5822 column.replace(dot_or_id) 5823 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1027 def __init__( 1028 self, 1029 error_level: t.Optional[ErrorLevel] = None, 1030 error_message_context: int = 100, 1031 max_errors: int = 3, 1032 dialect: DialectType = None, 1033 ): 1034 from sqlglot.dialects import Dialect 1035 1036 self.error_level = error_level or ErrorLevel.IMMEDIATE 1037 self.error_message_context = error_message_context 1038 self.max_errors = max_errors 1039 self.dialect = Dialect.get_or_raise(dialect) 1040 self.reset()
1052 def parse( 1053 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1054 ) -> t.List[t.Optional[exp.Expression]]: 1055 """ 1056 Parses a list of tokens and returns a list of syntax trees, one tree 1057 per parsed SQL statement. 1058 1059 Args: 1060 raw_tokens: The list of tokens. 1061 sql: The original SQL string, used to produce helpful debug messages. 1062 1063 Returns: 1064 The list of the produced syntax trees. 1065 """ 1066 return self._parse( 1067 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1068 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1070 def parse_into( 1071 self, 1072 expression_types: exp.IntoType, 1073 raw_tokens: t.List[Token], 1074 sql: t.Optional[str] = None, 1075 ) -> t.List[t.Optional[exp.Expression]]: 1076 """ 1077 Parses a list of tokens into a given Expression type. If a collection of Expression 1078 types is given instead, this method will try to parse the token list into each one 1079 of them, stopping at the first for which the parsing succeeds. 1080 1081 Args: 1082 expression_types: The expression type(s) to try and parse the token list into. 1083 raw_tokens: The list of tokens. 1084 sql: The original SQL string, used to produce helpful debug messages. 1085 1086 Returns: 1087 The target Expression. 1088 """ 1089 errors = [] 1090 for expression_type in ensure_list(expression_types): 1091 parser = self.EXPRESSION_PARSERS.get(expression_type) 1092 if not parser: 1093 raise TypeError(f"No parser registered for {expression_type}") 1094 1095 try: 1096 return self._parse(parser, raw_tokens, sql) 1097 except ParseError as e: 1098 e.errors[0]["into_expression"] = expression_type 1099 errors.append(e) 1100 1101 raise ParseError( 1102 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1103 errors=merge_errors(errors), 1104 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1141 def check_errors(self) -> None: 1142 """Logs or raises any found errors, depending on the chosen error level setting.""" 1143 if self.error_level == ErrorLevel.WARN: 1144 for error in self.errors: 1145 logger.error(str(error)) 1146 elif self.error_level == ErrorLevel.RAISE and self.errors: 1147 raise ParseError( 1148 concat_messages(self.errors, self.max_errors), 1149 errors=merge_errors(self.errors), 1150 )
Logs or raises any found errors, depending on the chosen error level setting.
1152 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1153 """ 1154 Appends an error in the list of recorded errors or raises it, depending on the chosen 1155 error level setting. 1156 """ 1157 token = token or self._curr or self._prev or Token.string("") 1158 start = token.start 1159 end = token.end + 1 1160 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1161 highlight = self.sql[start:end] 1162 end_context = self.sql[end : end + self.error_message_context] 1163 1164 error = ParseError.new( 1165 f"{message}. Line {token.line}, Col: {token.col}.\n" 1166 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1167 description=message, 1168 line=token.line, 1169 col=token.col, 1170 start_context=start_context, 1171 highlight=highlight, 1172 end_context=end_context, 1173 ) 1174 1175 if self.error_level == ErrorLevel.IMMEDIATE: 1176 raise error 1177 1178 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1180 def expression( 1181 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1182 ) -> E: 1183 """ 1184 Creates a new, validated Expression. 1185 1186 Args: 1187 exp_class: The expression class to instantiate. 1188 comments: An optional list of comments to attach to the expression. 1189 kwargs: The arguments to set for the expression along with their respective values. 1190 1191 Returns: 1192 The target expression. 1193 """ 1194 instance = exp_class(**kwargs) 1195 instance.add_comments(comments) if comments else self._add_comments(instance) 1196 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1203 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1204 """ 1205 Validates an Expression, making sure that all its mandatory arguments are set. 1206 1207 Args: 1208 expression: The expression to validate. 1209 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1210 1211 Returns: 1212 The validated expression. 1213 """ 1214 if self.error_level != ErrorLevel.IGNORE: 1215 for error_message in expression.error_messages(args): 1216 self.raise_error(error_message) 1217 1218 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.